1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * Copyright 2018 Joyent, Inc. All rights reserved.
28 * Copyright (c) 2012 by Delphix. All rights reserved.
29 */
30
31 #include <mdb/mdb_param.h>
32 #include <mdb/mdb_modapi.h>
33 #include <mdb/mdb_ctf.h>
34 #include <mdb/mdb_whatis.h>
35 #include <sys/cpuvar.h>
36 #include <sys/kmem_impl.h>
37 #include <sys/vmem_impl.h>
38 #include <sys/machelf.h>
39 #include <sys/modctl.h>
40 #include <sys/kobj.h>
41 #include <sys/panic.h>
42 #include <sys/stack.h>
43 #include <sys/sysmacros.h>
44 #include <vm/page.h>
45
46 #include "avl.h"
47 #include "combined.h"
48 #include "dist.h"
49 #include "kmem.h"
50 #include "list.h"
51
52 #define dprintf(x) if (mdb_debug_level) { \
53 mdb_printf("kmem debug: "); \
54 /*CSTYLED*/\
55 mdb_printf x ;\
56 }
57
58 #define KM_ALLOCATED 0x01
59 #define KM_FREE 0x02
60 #define KM_BUFCTL 0x04
61 #define KM_CONSTRUCTED 0x08 /* only constructed free buffers */
62 #define KM_HASH 0x10
63
64 static int mdb_debug_level = 0;
65
66 /*ARGSUSED*/
67 static int
kmem_init_walkers(uintptr_t addr,const kmem_cache_t * c,void * ignored)68 kmem_init_walkers(uintptr_t addr, const kmem_cache_t *c, void *ignored)
69 {
70 mdb_walker_t w;
71 char descr[64];
72
73 (void) mdb_snprintf(descr, sizeof (descr),
74 "walk the %s cache", c->cache_name);
75
76 w.walk_name = c->cache_name;
77 w.walk_descr = descr;
78 w.walk_init = kmem_walk_init;
79 w.walk_step = kmem_walk_step;
80 w.walk_fini = kmem_walk_fini;
81 w.walk_init_arg = (void *)addr;
82
83 if (mdb_add_walker(&w) == -1)
84 mdb_warn("failed to add %s walker", c->cache_name);
85
86 return (WALK_NEXT);
87 }
88
89 /*ARGSUSED*/
90 int
kmem_debug(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)91 kmem_debug(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
92 {
93 mdb_debug_level ^= 1;
94
95 mdb_printf("kmem: debugging is now %s\n",
96 mdb_debug_level ? "on" : "off");
97
98 return (DCMD_OK);
99 }
100
101 int
kmem_cache_walk_init(mdb_walk_state_t * wsp)102 kmem_cache_walk_init(mdb_walk_state_t *wsp)
103 {
104 GElf_Sym sym;
105
106 if (mdb_lookup_by_name("kmem_caches", &sym) == -1) {
107 mdb_warn("couldn't find kmem_caches");
108 return (WALK_ERR);
109 }
110
111 wsp->walk_addr = (uintptr_t)sym.st_value;
112
113 return (list_walk_init_named(wsp, "cache list", "cache"));
114 }
115
116 int
kmem_cpu_cache_walk_init(mdb_walk_state_t * wsp)117 kmem_cpu_cache_walk_init(mdb_walk_state_t *wsp)
118 {
119 if (wsp->walk_addr == NULL) {
120 mdb_warn("kmem_cpu_cache doesn't support global walks");
121 return (WALK_ERR);
122 }
123
124 if (mdb_layered_walk("cpu", wsp) == -1) {
125 mdb_warn("couldn't walk 'cpu'");
126 return (WALK_ERR);
127 }
128
129 wsp->walk_data = (void *)wsp->walk_addr;
130
131 return (WALK_NEXT);
132 }
133
134 int
kmem_cpu_cache_walk_step(mdb_walk_state_t * wsp)135 kmem_cpu_cache_walk_step(mdb_walk_state_t *wsp)
136 {
137 uintptr_t caddr = (uintptr_t)wsp->walk_data;
138 const cpu_t *cpu = wsp->walk_layer;
139 kmem_cpu_cache_t cc;
140
141 caddr += OFFSETOF(kmem_cache_t, cache_cpu[cpu->cpu_seqid]);
142
143 if (mdb_vread(&cc, sizeof (kmem_cpu_cache_t), caddr) == -1) {
144 mdb_warn("couldn't read kmem_cpu_cache at %p", caddr);
145 return (WALK_ERR);
146 }
147
148 return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata));
149 }
150
151 static int
kmem_slab_check(void * p,uintptr_t saddr,void * arg)152 kmem_slab_check(void *p, uintptr_t saddr, void *arg)
153 {
154 kmem_slab_t *sp = p;
155 uintptr_t caddr = (uintptr_t)arg;
156 if ((uintptr_t)sp->slab_cache != caddr) {
157 mdb_warn("slab %p isn't in cache %p (in cache %p)\n",
158 saddr, caddr, sp->slab_cache);
159 return (-1);
160 }
161
162 return (0);
163 }
164
165 static int
kmem_partial_slab_check(void * p,uintptr_t saddr,void * arg)166 kmem_partial_slab_check(void *p, uintptr_t saddr, void *arg)
167 {
168 kmem_slab_t *sp = p;
169
170 int rc = kmem_slab_check(p, saddr, arg);
171 if (rc != 0) {
172 return (rc);
173 }
174
175 if (!KMEM_SLAB_IS_PARTIAL(sp)) {
176 mdb_warn("slab %p is not a partial slab\n", saddr);
177 return (-1);
178 }
179
180 return (0);
181 }
182
183 static int
kmem_complete_slab_check(void * p,uintptr_t saddr,void * arg)184 kmem_complete_slab_check(void *p, uintptr_t saddr, void *arg)
185 {
186 kmem_slab_t *sp = p;
187
188 int rc = kmem_slab_check(p, saddr, arg);
189 if (rc != 0) {
190 return (rc);
191 }
192
193 if (!KMEM_SLAB_IS_ALL_USED(sp)) {
194 mdb_warn("slab %p is not completely allocated\n", saddr);
195 return (-1);
196 }
197
198 return (0);
199 }
200
201 typedef struct {
202 uintptr_t kns_cache_addr;
203 int kns_nslabs;
204 } kmem_nth_slab_t;
205
206 static int
kmem_nth_slab_check(void * p,uintptr_t saddr,void * arg)207 kmem_nth_slab_check(void *p, uintptr_t saddr, void *arg)
208 {
209 kmem_nth_slab_t *chkp = arg;
210
211 int rc = kmem_slab_check(p, saddr, (void *)chkp->kns_cache_addr);
212 if (rc != 0) {
213 return (rc);
214 }
215
216 return (chkp->kns_nslabs-- == 0 ? 1 : 0);
217 }
218
219 static int
kmem_complete_slab_walk_init(mdb_walk_state_t * wsp)220 kmem_complete_slab_walk_init(mdb_walk_state_t *wsp)
221 {
222 uintptr_t caddr = wsp->walk_addr;
223
224 wsp->walk_addr = (uintptr_t)(caddr +
225 offsetof(kmem_cache_t, cache_complete_slabs));
226
227 return (list_walk_init_checked(wsp, "slab list", "slab",
228 kmem_complete_slab_check, (void *)caddr));
229 }
230
231 static int
kmem_partial_slab_walk_init(mdb_walk_state_t * wsp)232 kmem_partial_slab_walk_init(mdb_walk_state_t *wsp)
233 {
234 uintptr_t caddr = wsp->walk_addr;
235
236 wsp->walk_addr = (uintptr_t)(caddr +
237 offsetof(kmem_cache_t, cache_partial_slabs));
238
239 return (avl_walk_init_checked(wsp, "slab list", "slab",
240 kmem_partial_slab_check, (void *)caddr));
241 }
242
243 int
kmem_slab_walk_init(mdb_walk_state_t * wsp)244 kmem_slab_walk_init(mdb_walk_state_t *wsp)
245 {
246 uintptr_t caddr = wsp->walk_addr;
247
248 if (caddr == NULL) {
249 mdb_warn("kmem_slab doesn't support global walks\n");
250 return (WALK_ERR);
251 }
252
253 combined_walk_init(wsp);
254 combined_walk_add(wsp,
255 kmem_complete_slab_walk_init, list_walk_step, list_walk_fini);
256 combined_walk_add(wsp,
257 kmem_partial_slab_walk_init, avl_walk_step, avl_walk_fini);
258
259 return (WALK_NEXT);
260 }
261
262 static int
kmem_first_complete_slab_walk_init(mdb_walk_state_t * wsp)263 kmem_first_complete_slab_walk_init(mdb_walk_state_t *wsp)
264 {
265 uintptr_t caddr = wsp->walk_addr;
266 kmem_nth_slab_t *chk;
267
268 chk = mdb_alloc(sizeof (kmem_nth_slab_t),
269 UM_SLEEP | UM_GC);
270 chk->kns_cache_addr = caddr;
271 chk->kns_nslabs = 1;
272 wsp->walk_addr = (uintptr_t)(caddr +
273 offsetof(kmem_cache_t, cache_complete_slabs));
274
275 return (list_walk_init_checked(wsp, "slab list", "slab",
276 kmem_nth_slab_check, chk));
277 }
278
279 int
kmem_slab_walk_partial_init(mdb_walk_state_t * wsp)280 kmem_slab_walk_partial_init(mdb_walk_state_t *wsp)
281 {
282 uintptr_t caddr = wsp->walk_addr;
283 kmem_cache_t c;
284
285 if (caddr == NULL) {
286 mdb_warn("kmem_slab_partial doesn't support global walks\n");
287 return (WALK_ERR);
288 }
289
290 if (mdb_vread(&c, sizeof (c), caddr) == -1) {
291 mdb_warn("couldn't read kmem_cache at %p", caddr);
292 return (WALK_ERR);
293 }
294
295 combined_walk_init(wsp);
296
297 /*
298 * Some consumers (umem_walk_step(), in particular) require at
299 * least one callback if there are any buffers in the cache. So
300 * if there are *no* partial slabs, report the first full slab, if
301 * any.
302 *
303 * Yes, this is ugly, but it's cleaner than the other possibilities.
304 */
305 if (c.cache_partial_slabs.avl_numnodes == 0) {
306 combined_walk_add(wsp, kmem_first_complete_slab_walk_init,
307 list_walk_step, list_walk_fini);
308 } else {
309 combined_walk_add(wsp, kmem_partial_slab_walk_init,
310 avl_walk_step, avl_walk_fini);
311 }
312
313 return (WALK_NEXT);
314 }
315
316 int
kmem_cache(uintptr_t addr,uint_t flags,int ac,const mdb_arg_t * argv)317 kmem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
318 {
319 kmem_cache_t c;
320 const char *filter = NULL;
321
322 if (mdb_getopts(ac, argv,
323 'n', MDB_OPT_STR, &filter,
324 NULL) != ac) {
325 return (DCMD_USAGE);
326 }
327
328 if (!(flags & DCMD_ADDRSPEC)) {
329 if (mdb_walk_dcmd("kmem_cache", "kmem_cache", ac, argv) == -1) {
330 mdb_warn("can't walk kmem_cache");
331 return (DCMD_ERR);
332 }
333 return (DCMD_OK);
334 }
335
336 if (DCMD_HDRSPEC(flags))
337 mdb_printf("%-?s %-25s %4s %6s %8s %8s\n", "ADDR", "NAME",
338 "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL");
339
340 if (mdb_vread(&c, sizeof (c), addr) == -1) {
341 mdb_warn("couldn't read kmem_cache at %p", addr);
342 return (DCMD_ERR);
343 }
344
345 if ((filter != NULL) && (strstr(c.cache_name, filter) == NULL))
346 return (DCMD_OK);
347
348 mdb_printf("%0?p %-25s %04x %06x %8ld %8lld\n", addr, c.cache_name,
349 c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal);
350
351 return (DCMD_OK);
352 }
353
354 void
kmem_cache_help(void)355 kmem_cache_help(void)
356 {
357 mdb_printf("%s", "Print kernel memory caches.\n\n");
358 mdb_dec_indent(2);
359 mdb_printf("%<b>OPTIONS%</b>\n");
360 mdb_inc_indent(2);
361 mdb_printf("%s",
362 " -n name\n"
363 " name of kmem cache (or matching partial name)\n"
364 "\n"
365 "Column\tDescription\n"
366 "\n"
367 "ADDR\t\taddress of kmem cache\n"
368 "NAME\t\tname of kmem cache\n"
369 "FLAG\t\tvarious cache state flags\n"
370 "CFLAG\t\tcache creation flags\n"
371 "BUFSIZE\tobject size in bytes\n"
372 "BUFTOTL\tcurrent total buffers in cache (allocated and free)\n");
373 }
374
375 #define LABEL_WIDTH 11
376 static void
kmem_slabs_print_dist(uint_t * ks_bucket,size_t buffers_per_slab,size_t maxbuckets,size_t minbucketsize)377 kmem_slabs_print_dist(uint_t *ks_bucket, size_t buffers_per_slab,
378 size_t maxbuckets, size_t minbucketsize)
379 {
380 uint64_t total;
381 int buckets;
382 int i;
383 const int *distarray;
384 int complete[2];
385
386 buckets = buffers_per_slab;
387
388 total = 0;
389 for (i = 0; i <= buffers_per_slab; i++)
390 total += ks_bucket[i];
391
392 if (maxbuckets > 1)
393 buckets = MIN(buckets, maxbuckets);
394
395 if (minbucketsize > 1) {
396 /*
397 * minbucketsize does not apply to the first bucket reserved
398 * for completely allocated slabs
399 */
400 buckets = MIN(buckets, 1 + ((buffers_per_slab - 1) /
401 minbucketsize));
402 if ((buckets < 2) && (buffers_per_slab > 1)) {
403 buckets = 2;
404 minbucketsize = (buffers_per_slab - 1);
405 }
406 }
407
408 /*
409 * The first printed bucket is reserved for completely allocated slabs.
410 * Passing (buckets - 1) excludes that bucket from the generated
411 * distribution, since we're handling it as a special case.
412 */
413 complete[0] = buffers_per_slab;
414 complete[1] = buffers_per_slab + 1;
415 distarray = dist_linear(buckets - 1, 1, buffers_per_slab - 1);
416
417 mdb_printf("%*s\n", LABEL_WIDTH, "Allocated");
418 dist_print_header("Buffers", LABEL_WIDTH, "Slabs");
419
420 dist_print_bucket(complete, 0, ks_bucket, total, LABEL_WIDTH);
421 /*
422 * Print bucket ranges in descending order after the first bucket for
423 * completely allocated slabs, so a person can see immediately whether
424 * or not there is fragmentation without having to scan possibly
425 * multiple screens of output. Starting at (buckets - 2) excludes the
426 * extra terminating bucket.
427 */
428 for (i = buckets - 2; i >= 0; i--) {
429 dist_print_bucket(distarray, i, ks_bucket, total, LABEL_WIDTH);
430 }
431 mdb_printf("\n");
432 }
433 #undef LABEL_WIDTH
434
435 /*ARGSUSED*/
436 static int
kmem_first_slab(uintptr_t addr,const kmem_slab_t * sp,boolean_t * is_slab)437 kmem_first_slab(uintptr_t addr, const kmem_slab_t *sp, boolean_t *is_slab)
438 {
439 *is_slab = B_TRUE;
440 return (WALK_DONE);
441 }
442
443 /*ARGSUSED*/
444 static int
kmem_first_partial_slab(uintptr_t addr,const kmem_slab_t * sp,boolean_t * is_slab)445 kmem_first_partial_slab(uintptr_t addr, const kmem_slab_t *sp,
446 boolean_t *is_slab)
447 {
448 /*
449 * The "kmem_partial_slab" walker reports the first full slab if there
450 * are no partial slabs (for the sake of consumers that require at least
451 * one callback if there are any buffers in the cache).
452 */
453 *is_slab = KMEM_SLAB_IS_PARTIAL(sp);
454 return (WALK_DONE);
455 }
456
457 typedef struct kmem_slab_usage {
458 int ksu_refcnt; /* count of allocated buffers on slab */
459 boolean_t ksu_nomove; /* slab marked non-reclaimable */
460 } kmem_slab_usage_t;
461
462 typedef struct kmem_slab_stats {
463 const kmem_cache_t *ks_cp;
464 int ks_slabs; /* slabs in cache */
465 int ks_partial_slabs; /* partially allocated slabs in cache */
466 uint64_t ks_unused_buffers; /* total unused buffers in cache */
467 int ks_max_buffers_per_slab; /* max buffers per slab */
468 int ks_usage_len; /* ks_usage array length */
469 kmem_slab_usage_t *ks_usage; /* partial slab usage */
470 uint_t *ks_bucket; /* slab usage distribution */
471 } kmem_slab_stats_t;
472
473 /*ARGSUSED*/
474 static int
kmem_slablist_stat(uintptr_t addr,const kmem_slab_t * sp,kmem_slab_stats_t * ks)475 kmem_slablist_stat(uintptr_t addr, const kmem_slab_t *sp,
476 kmem_slab_stats_t *ks)
477 {
478 kmem_slab_usage_t *ksu;
479 long unused;
480
481 ks->ks_slabs++;
482 ks->ks_bucket[sp->slab_refcnt]++;
483
484 unused = (sp->slab_chunks - sp->slab_refcnt);
485 if (unused == 0) {
486 return (WALK_NEXT);
487 }
488
489 ks->ks_partial_slabs++;
490 ks->ks_unused_buffers += unused;
491
492 if (ks->ks_partial_slabs > ks->ks_usage_len) {
493 kmem_slab_usage_t *usage;
494 int len = ks->ks_usage_len;
495
496 len = (len == 0 ? 16 : len * 2);
497 usage = mdb_zalloc(len * sizeof (kmem_slab_usage_t), UM_SLEEP);
498 if (ks->ks_usage != NULL) {
499 bcopy(ks->ks_usage, usage,
500 ks->ks_usage_len * sizeof (kmem_slab_usage_t));
501 mdb_free(ks->ks_usage,
502 ks->ks_usage_len * sizeof (kmem_slab_usage_t));
503 }
504 ks->ks_usage = usage;
505 ks->ks_usage_len = len;
506 }
507
508 ksu = &ks->ks_usage[ks->ks_partial_slabs - 1];
509 ksu->ksu_refcnt = sp->slab_refcnt;
510 ksu->ksu_nomove = (sp->slab_flags & KMEM_SLAB_NOMOVE);
511 return (WALK_NEXT);
512 }
513
514 static void
kmem_slabs_header()515 kmem_slabs_header()
516 {
517 mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
518 "", "", "Partial", "", "Unused", "");
519 mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
520 "Cache Name", "Slabs", "Slabs", "Buffers", "Buffers", "Waste");
521 mdb_printf("%-25s %8s %8s %9s %9s %6s\n",
522 "-------------------------", "--------", "--------", "---------",
523 "---------", "------");
524 }
525
526 int
kmem_slabs(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)527 kmem_slabs(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
528 {
529 kmem_cache_t c;
530 kmem_slab_stats_t stats;
531 mdb_walk_cb_t cb;
532 int pct;
533 int tenths_pct;
534 size_t maxbuckets = 1;
535 size_t minbucketsize = 0;
536 const char *filter = NULL;
537 const char *name = NULL;
538 uint_t opt_v = FALSE;
539 boolean_t buckets = B_FALSE;
540 boolean_t skip = B_FALSE;
541
542 if (mdb_getopts(argc, argv,
543 'B', MDB_OPT_UINTPTR, &minbucketsize,
544 'b', MDB_OPT_UINTPTR, &maxbuckets,
545 'n', MDB_OPT_STR, &filter,
546 'N', MDB_OPT_STR, &name,
547 'v', MDB_OPT_SETBITS, TRUE, &opt_v,
548 NULL) != argc) {
549 return (DCMD_USAGE);
550 }
551
552 if ((maxbuckets != 1) || (minbucketsize != 0)) {
553 buckets = B_TRUE;
554 }
555
556 if (!(flags & DCMD_ADDRSPEC)) {
557 if (mdb_walk_dcmd("kmem_cache", "kmem_slabs", argc,
558 argv) == -1) {
559 mdb_warn("can't walk kmem_cache");
560 return (DCMD_ERR);
561 }
562 return (DCMD_OK);
563 }
564
565 if (mdb_vread(&c, sizeof (c), addr) == -1) {
566 mdb_warn("couldn't read kmem_cache at %p", addr);
567 return (DCMD_ERR);
568 }
569
570 if (name == NULL) {
571 skip = ((filter != NULL) &&
572 (strstr(c.cache_name, filter) == NULL));
573 } else if (filter == NULL) {
574 skip = (strcmp(c.cache_name, name) != 0);
575 } else {
576 /* match either -n or -N */
577 skip = ((strcmp(c.cache_name, name) != 0) &&
578 (strstr(c.cache_name, filter) == NULL));
579 }
580
581 if (!(opt_v || buckets) && DCMD_HDRSPEC(flags)) {
582 kmem_slabs_header();
583 } else if ((opt_v || buckets) && !skip) {
584 if (DCMD_HDRSPEC(flags)) {
585 kmem_slabs_header();
586 } else {
587 boolean_t is_slab = B_FALSE;
588 const char *walker_name;
589 if (opt_v) {
590 cb = (mdb_walk_cb_t)kmem_first_partial_slab;
591 walker_name = "kmem_slab_partial";
592 } else {
593 cb = (mdb_walk_cb_t)kmem_first_slab;
594 walker_name = "kmem_slab";
595 }
596 (void) mdb_pwalk(walker_name, cb, &is_slab, addr);
597 if (is_slab) {
598 kmem_slabs_header();
599 }
600 }
601 }
602
603 if (skip) {
604 return (DCMD_OK);
605 }
606
607 bzero(&stats, sizeof (kmem_slab_stats_t));
608 stats.ks_cp = &c;
609 stats.ks_max_buffers_per_slab = c.cache_maxchunks;
610 /* +1 to include a zero bucket */
611 stats.ks_bucket = mdb_zalloc((stats.ks_max_buffers_per_slab + 1) *
612 sizeof (*stats.ks_bucket), UM_SLEEP);
613 cb = (mdb_walk_cb_t)kmem_slablist_stat;
614 (void) mdb_pwalk("kmem_slab", cb, &stats, addr);
615
616 if (c.cache_buftotal == 0) {
617 pct = 0;
618 tenths_pct = 0;
619 } else {
620 uint64_t n = stats.ks_unused_buffers * 10000;
621 pct = (int)(n / c.cache_buftotal);
622 tenths_pct = pct - ((pct / 100) * 100);
623 tenths_pct = (tenths_pct + 5) / 10; /* round nearest tenth */
624 if (tenths_pct == 10) {
625 pct += 100;
626 tenths_pct = 0;
627 }
628 }
629
630 pct /= 100;
631 mdb_printf("%-25s %8d %8d %9lld %9lld %3d.%1d%%\n", c.cache_name,
632 stats.ks_slabs, stats.ks_partial_slabs, c.cache_buftotal,
633 stats.ks_unused_buffers, pct, tenths_pct);
634
635 if (maxbuckets == 0) {
636 maxbuckets = stats.ks_max_buffers_per_slab;
637 }
638
639 if (((maxbuckets > 1) || (minbucketsize > 0)) &&
640 (stats.ks_slabs > 0)) {
641 mdb_printf("\n");
642 kmem_slabs_print_dist(stats.ks_bucket,
643 stats.ks_max_buffers_per_slab, maxbuckets, minbucketsize);
644 }
645
646 mdb_free(stats.ks_bucket, (stats.ks_max_buffers_per_slab + 1) *
647 sizeof (*stats.ks_bucket));
648
649 if (!opt_v) {
650 return (DCMD_OK);
651 }
652
653 if (opt_v && (stats.ks_partial_slabs > 0)) {
654 int i;
655 kmem_slab_usage_t *ksu;
656
657 mdb_printf(" %d complete (%d), %d partial:",
658 (stats.ks_slabs - stats.ks_partial_slabs),
659 stats.ks_max_buffers_per_slab,
660 stats.ks_partial_slabs);
661
662 for (i = 0; i < stats.ks_partial_slabs; i++) {
663 ksu = &stats.ks_usage[i];
664 mdb_printf(" %d%s", ksu->ksu_refcnt,
665 (ksu->ksu_nomove ? "*" : ""));
666 }
667 mdb_printf("\n\n");
668 }
669
670 if (stats.ks_usage_len > 0) {
671 mdb_free(stats.ks_usage,
672 stats.ks_usage_len * sizeof (kmem_slab_usage_t));
673 }
674
675 return (DCMD_OK);
676 }
677
678 void
kmem_slabs_help(void)679 kmem_slabs_help(void)
680 {
681 mdb_printf("%s",
682 "Display slab usage per kmem cache.\n\n");
683 mdb_dec_indent(2);
684 mdb_printf("%<b>OPTIONS%</b>\n");
685 mdb_inc_indent(2);
686 mdb_printf("%s",
687 " -n name\n"
688 " name of kmem cache (or matching partial name)\n"
689 " -N name\n"
690 " exact name of kmem cache\n"
691 " -b maxbins\n"
692 " Print a distribution of allocated buffers per slab using at\n"
693 " most maxbins bins. The first bin is reserved for completely\n"
694 " allocated slabs. Setting maxbins to zero (-b 0) has the same\n"
695 " effect as specifying the maximum allocated buffers per slab\n"
696 " or setting minbinsize to 1 (-B 1).\n"
697 " -B minbinsize\n"
698 " Print a distribution of allocated buffers per slab, making\n"
699 " all bins (except the first, reserved for completely allocated\n"
700 " slabs) at least minbinsize buffers apart.\n"
701 " -v verbose output: List the allocated buffer count of each partial\n"
702 " slab on the free list in order from front to back to show how\n"
703 " closely the slabs are ordered by usage. For example\n"
704 "\n"
705 " 10 complete, 3 partial (8): 7 3 1\n"
706 "\n"
707 " means there are thirteen slabs with eight buffers each, including\n"
708 " three partially allocated slabs with less than all eight buffers\n"
709 " allocated.\n"
710 "\n"
711 " Buffer allocations are always from the front of the partial slab\n"
712 " list. When a buffer is freed from a completely used slab, that\n"
713 " slab is added to the front of the partial slab list. Assuming\n"
714 " that all buffers are equally likely to be freed soon, the\n"
715 " desired order of partial slabs is most-used at the front of the\n"
716 " list and least-used at the back (as in the example above).\n"
717 " However, if a slab contains an allocated buffer that will not\n"
718 " soon be freed, it would be better for that slab to be at the\n"
719 " front where all of its buffers can be allocated. Taking a slab\n"
720 " off the partial slab list (either with all buffers freed or all\n"
721 " buffers allocated) reduces cache fragmentation.\n"
722 "\n"
723 " A slab's allocated buffer count representing a partial slab (9 in\n"
724 " the example below) may be marked as follows:\n"
725 "\n"
726 " 9* An asterisk indicates that kmem has marked the slab non-\n"
727 " reclaimable because the kmem client refused to move one of the\n"
728 " slab's buffers. Since kmem does not expect to completely free the\n"
729 " slab, it moves it to the front of the list in the hope of\n"
730 " completely allocating it instead. A slab marked with an asterisk\n"
731 " stays marked for as long as it remains on the partial slab list.\n"
732 "\n"
733 "Column\t\tDescription\n"
734 "\n"
735 "Cache Name\t\tname of kmem cache\n"
736 "Slabs\t\t\ttotal slab count\n"
737 "Partial Slabs\t\tcount of partially allocated slabs on the free list\n"
738 "Buffers\t\ttotal buffer count (Slabs * (buffers per slab))\n"
739 "Unused Buffers\tcount of unallocated buffers across all partial slabs\n"
740 "Waste\t\t\t(Unused Buffers / Buffers) does not include space\n"
741 "\t\t\t for accounting structures (debug mode), slab\n"
742 "\t\t\t coloring (incremental small offsets to stagger\n"
743 "\t\t\t buffer alignment), or the per-CPU magazine layer\n");
744 }
745
746 static int
addrcmp(const void * lhs,const void * rhs)747 addrcmp(const void *lhs, const void *rhs)
748 {
749 uintptr_t p1 = *((uintptr_t *)lhs);
750 uintptr_t p2 = *((uintptr_t *)rhs);
751
752 if (p1 < p2)
753 return (-1);
754 if (p1 > p2)
755 return (1);
756 return (0);
757 }
758
759 static int
bufctlcmp(const kmem_bufctl_audit_t ** lhs,const kmem_bufctl_audit_t ** rhs)760 bufctlcmp(const kmem_bufctl_audit_t **lhs, const kmem_bufctl_audit_t **rhs)
761 {
762 const kmem_bufctl_audit_t *bcp1 = *lhs;
763 const kmem_bufctl_audit_t *bcp2 = *rhs;
764
765 if (bcp1->bc_timestamp > bcp2->bc_timestamp)
766 return (-1);
767
768 if (bcp1->bc_timestamp < bcp2->bc_timestamp)
769 return (1);
770
771 return (0);
772 }
773
774 typedef struct kmem_hash_walk {
775 uintptr_t *kmhw_table;
776 size_t kmhw_nelems;
777 size_t kmhw_pos;
778 kmem_bufctl_t kmhw_cur;
779 } kmem_hash_walk_t;
780
781 int
kmem_hash_walk_init(mdb_walk_state_t * wsp)782 kmem_hash_walk_init(mdb_walk_state_t *wsp)
783 {
784 kmem_hash_walk_t *kmhw;
785 uintptr_t *hash;
786 kmem_cache_t c;
787 uintptr_t haddr, addr = wsp->walk_addr;
788 size_t nelems;
789 size_t hsize;
790
791 if (addr == NULL) {
792 mdb_warn("kmem_hash doesn't support global walks\n");
793 return (WALK_ERR);
794 }
795
796 if (mdb_vread(&c, sizeof (c), addr) == -1) {
797 mdb_warn("couldn't read cache at addr %p", addr);
798 return (WALK_ERR);
799 }
800
801 if (!(c.cache_flags & KMF_HASH)) {
802 mdb_warn("cache %p doesn't have a hash table\n", addr);
803 return (WALK_DONE); /* nothing to do */
804 }
805
806 kmhw = mdb_zalloc(sizeof (kmem_hash_walk_t), UM_SLEEP);
807 kmhw->kmhw_cur.bc_next = NULL;
808 kmhw->kmhw_pos = 0;
809
810 kmhw->kmhw_nelems = nelems = c.cache_hash_mask + 1;
811 hsize = nelems * sizeof (uintptr_t);
812 haddr = (uintptr_t)c.cache_hash_table;
813
814 kmhw->kmhw_table = hash = mdb_alloc(hsize, UM_SLEEP);
815 if (mdb_vread(hash, hsize, haddr) == -1) {
816 mdb_warn("failed to read hash table at %p", haddr);
817 mdb_free(hash, hsize);
818 mdb_free(kmhw, sizeof (kmem_hash_walk_t));
819 return (WALK_ERR);
820 }
821
822 wsp->walk_data = kmhw;
823
824 return (WALK_NEXT);
825 }
826
827 int
kmem_hash_walk_step(mdb_walk_state_t * wsp)828 kmem_hash_walk_step(mdb_walk_state_t *wsp)
829 {
830 kmem_hash_walk_t *kmhw = wsp->walk_data;
831 uintptr_t addr = NULL;
832
833 if ((addr = (uintptr_t)kmhw->kmhw_cur.bc_next) == NULL) {
834 while (kmhw->kmhw_pos < kmhw->kmhw_nelems) {
835 if ((addr = kmhw->kmhw_table[kmhw->kmhw_pos++]) != NULL)
836 break;
837 }
838 }
839 if (addr == NULL)
840 return (WALK_DONE);
841
842 if (mdb_vread(&kmhw->kmhw_cur, sizeof (kmem_bufctl_t), addr) == -1) {
843 mdb_warn("couldn't read kmem_bufctl_t at addr %p", addr);
844 return (WALK_ERR);
845 }
846
847 return (wsp->walk_callback(addr, &kmhw->kmhw_cur, wsp->walk_cbdata));
848 }
849
850 void
kmem_hash_walk_fini(mdb_walk_state_t * wsp)851 kmem_hash_walk_fini(mdb_walk_state_t *wsp)
852 {
853 kmem_hash_walk_t *kmhw = wsp->walk_data;
854
855 if (kmhw == NULL)
856 return;
857
858 mdb_free(kmhw->kmhw_table, kmhw->kmhw_nelems * sizeof (uintptr_t));
859 mdb_free(kmhw, sizeof (kmem_hash_walk_t));
860 }
861
862 /*
863 * Find the address of the bufctl structure for the address 'buf' in cache
864 * 'cp', which is at address caddr, and place it in *out.
865 */
866 static int
kmem_hash_lookup(kmem_cache_t * cp,uintptr_t caddr,void * buf,uintptr_t * out)867 kmem_hash_lookup(kmem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out)
868 {
869 uintptr_t bucket = (uintptr_t)KMEM_HASH(cp, buf);
870 kmem_bufctl_t *bcp;
871 kmem_bufctl_t bc;
872
873 if (mdb_vread(&bcp, sizeof (kmem_bufctl_t *), bucket) == -1) {
874 mdb_warn("unable to read hash bucket for %p in cache %p",
875 buf, caddr);
876 return (-1);
877 }
878
879 while (bcp != NULL) {
880 if (mdb_vread(&bc, sizeof (kmem_bufctl_t),
881 (uintptr_t)bcp) == -1) {
882 mdb_warn("unable to read bufctl at %p", bcp);
883 return (-1);
884 }
885 if (bc.bc_addr == buf) {
886 *out = (uintptr_t)bcp;
887 return (0);
888 }
889 bcp = bc.bc_next;
890 }
891
892 mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr);
893 return (-1);
894 }
895
896 int
kmem_get_magsize(const kmem_cache_t * cp)897 kmem_get_magsize(const kmem_cache_t *cp)
898 {
899 uintptr_t addr = (uintptr_t)cp->cache_magtype;
900 GElf_Sym mt_sym;
901 kmem_magtype_t mt;
902 int res;
903
904 /*
905 * if cpu 0 has a non-zero magsize, it must be correct. caches
906 * with KMF_NOMAGAZINE have disabled their magazine layers, so
907 * it is okay to return 0 for them.
908 */
909 if ((res = cp->cache_cpu[0].cc_magsize) != 0 ||
910 (cp->cache_flags & KMF_NOMAGAZINE))
911 return (res);
912
913 if (mdb_lookup_by_name("kmem_magtype", &mt_sym) == -1) {
914 mdb_warn("unable to read 'kmem_magtype'");
915 } else if (addr < mt_sym.st_value ||
916 addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 ||
917 ((addr - mt_sym.st_value) % sizeof (mt)) != 0) {
918 mdb_warn("cache '%s' has invalid magtype pointer (%p)\n",
919 cp->cache_name, addr);
920 return (0);
921 }
922 if (mdb_vread(&mt, sizeof (mt), addr) == -1) {
923 mdb_warn("unable to read magtype at %a", addr);
924 return (0);
925 }
926 return (mt.mt_magsize);
927 }
928
929 /*ARGSUSED*/
930 static int
kmem_estimate_slab(uintptr_t addr,const kmem_slab_t * sp,size_t * est)931 kmem_estimate_slab(uintptr_t addr, const kmem_slab_t *sp, size_t *est)
932 {
933 *est -= (sp->slab_chunks - sp->slab_refcnt);
934
935 return (WALK_NEXT);
936 }
937
938 /*
939 * Returns an upper bound on the number of allocated buffers in a given
940 * cache.
941 */
942 size_t
kmem_estimate_allocated(uintptr_t addr,const kmem_cache_t * cp)943 kmem_estimate_allocated(uintptr_t addr, const kmem_cache_t *cp)
944 {
945 int magsize;
946 size_t cache_est;
947
948 cache_est = cp->cache_buftotal;
949
950 (void) mdb_pwalk("kmem_slab_partial",
951 (mdb_walk_cb_t)kmem_estimate_slab, &cache_est, addr);
952
953 if ((magsize = kmem_get_magsize(cp)) != 0) {
954 size_t mag_est = cp->cache_full.ml_total * magsize;
955
956 if (cache_est >= mag_est) {
957 cache_est -= mag_est;
958 } else {
959 mdb_warn("cache %p's magazine layer holds more buffers "
960 "than the slab layer.\n", addr);
961 }
962 }
963 return (cache_est);
964 }
965
966 #define READMAG_ROUNDS(rounds) { \
967 if (mdb_vread(mp, magbsize, (uintptr_t)kmp) == -1) { \
968 mdb_warn("couldn't read magazine at %p", kmp); \
969 goto fail; \
970 } \
971 for (i = 0; i < rounds; i++) { \
972 maglist[magcnt++] = mp->mag_round[i]; \
973 if (magcnt == magmax) { \
974 mdb_warn("%d magazines exceeds fudge factor\n", \
975 magcnt); \
976 goto fail; \
977 } \
978 } \
979 }
980
981 int
kmem_read_magazines(kmem_cache_t * cp,uintptr_t addr,int ncpus,void *** maglistp,size_t * magcntp,size_t * magmaxp,int alloc_flags)982 kmem_read_magazines(kmem_cache_t *cp, uintptr_t addr, int ncpus,
983 void ***maglistp, size_t *magcntp, size_t *magmaxp, int alloc_flags)
984 {
985 kmem_magazine_t *kmp, *mp;
986 void **maglist = NULL;
987 int i, cpu;
988 size_t magsize, magmax, magbsize;
989 size_t magcnt = 0;
990
991 /*
992 * Read the magtype out of the cache, after verifying the pointer's
993 * correctness.
994 */
995 magsize = kmem_get_magsize(cp);
996 if (magsize == 0) {
997 *maglistp = NULL;
998 *magcntp = 0;
999 *magmaxp = 0;
1000 return (WALK_NEXT);
1001 }
1002
1003 /*
1004 * There are several places where we need to go buffer hunting:
1005 * the per-CPU loaded magazine, the per-CPU spare full magazine,
1006 * and the full magazine list in the depot.
1007 *
1008 * For an upper bound on the number of buffers in the magazine
1009 * layer, we have the number of magazines on the cache_full
1010 * list plus at most two magazines per CPU (the loaded and the
1011 * spare). Toss in 100 magazines as a fudge factor in case this
1012 * is live (the number "100" comes from the same fudge factor in
1013 * crash(1M)).
1014 */
1015 magmax = (cp->cache_full.ml_total + 2 * ncpus + 100) * magsize;
1016 magbsize = offsetof(kmem_magazine_t, mag_round[magsize]);
1017
1018 if (magbsize >= PAGESIZE / 2) {
1019 mdb_warn("magazine size for cache %p unreasonable (%x)\n",
1020 addr, magbsize);
1021 return (WALK_ERR);
1022 }
1023
1024 maglist = mdb_alloc(magmax * sizeof (void *), alloc_flags);
1025 mp = mdb_alloc(magbsize, alloc_flags);
1026 if (mp == NULL || maglist == NULL)
1027 goto fail;
1028
1029 /*
1030 * First up: the magazines in the depot (i.e. on the cache_full list).
1031 */
1032 for (kmp = cp->cache_full.ml_list; kmp != NULL; ) {
1033 READMAG_ROUNDS(magsize);
1034 kmp = mp->mag_next;
1035
1036 if (kmp == cp->cache_full.ml_list)
1037 break; /* cache_full list loop detected */
1038 }
1039
1040 dprintf(("cache_full list done\n"));
1041
1042 /*
1043 * Now whip through the CPUs, snagging the loaded magazines
1044 * and full spares.
1045 *
1046 * In order to prevent inconsistent dumps, rounds and prounds
1047 * are copied aside before dumping begins.
1048 */
1049 for (cpu = 0; cpu < ncpus; cpu++) {
1050 kmem_cpu_cache_t *ccp = &cp->cache_cpu[cpu];
1051 short rounds, prounds;
1052
1053 if (KMEM_DUMPCC(ccp)) {
1054 rounds = ccp->cc_dump_rounds;
1055 prounds = ccp->cc_dump_prounds;
1056 } else {
1057 rounds = ccp->cc_rounds;
1058 prounds = ccp->cc_prounds;
1059 }
1060
1061 dprintf(("reading cpu cache %p\n",
1062 (uintptr_t)ccp - (uintptr_t)cp + addr));
1063
1064 if (rounds > 0 &&
1065 (kmp = ccp->cc_loaded) != NULL) {
1066 dprintf(("reading %d loaded rounds\n", rounds));
1067 READMAG_ROUNDS(rounds);
1068 }
1069
1070 if (prounds > 0 &&
1071 (kmp = ccp->cc_ploaded) != NULL) {
1072 dprintf(("reading %d previously loaded rounds\n",
1073 prounds));
1074 READMAG_ROUNDS(prounds);
1075 }
1076 }
1077
1078 dprintf(("magazine layer: %d buffers\n", magcnt));
1079
1080 if (!(alloc_flags & UM_GC))
1081 mdb_free(mp, magbsize);
1082
1083 *maglistp = maglist;
1084 *magcntp = magcnt;
1085 *magmaxp = magmax;
1086
1087 return (WALK_NEXT);
1088
1089 fail:
1090 if (!(alloc_flags & UM_GC)) {
1091 if (mp)
1092 mdb_free(mp, magbsize);
1093 if (maglist)
1094 mdb_free(maglist, magmax * sizeof (void *));
1095 }
1096 return (WALK_ERR);
1097 }
1098
1099 static int
kmem_walk_callback(mdb_walk_state_t * wsp,uintptr_t buf)1100 kmem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf)
1101 {
1102 return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata));
1103 }
1104
1105 static int
bufctl_walk_callback(kmem_cache_t * cp,mdb_walk_state_t * wsp,uintptr_t buf)1106 bufctl_walk_callback(kmem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf)
1107 {
1108 kmem_bufctl_audit_t b;
1109
1110 /*
1111 * if KMF_AUDIT is not set, we know that we're looking at a
1112 * kmem_bufctl_t.
1113 */
1114 if (!(cp->cache_flags & KMF_AUDIT) ||
1115 mdb_vread(&b, sizeof (kmem_bufctl_audit_t), buf) == -1) {
1116 (void) memset(&b, 0, sizeof (b));
1117 if (mdb_vread(&b, sizeof (kmem_bufctl_t), buf) == -1) {
1118 mdb_warn("unable to read bufctl at %p", buf);
1119 return (WALK_ERR);
1120 }
1121 }
1122
1123 return (wsp->walk_callback(buf, &b, wsp->walk_cbdata));
1124 }
1125
1126 typedef struct kmem_walk {
1127 int kmw_type;
1128
1129 uintptr_t kmw_addr; /* cache address */
1130 kmem_cache_t *kmw_cp;
1131 size_t kmw_csize;
1132
1133 /*
1134 * magazine layer
1135 */
1136 void **kmw_maglist;
1137 size_t kmw_max;
1138 size_t kmw_count;
1139 size_t kmw_pos;
1140
1141 /*
1142 * slab layer
1143 */
1144 char *kmw_valid; /* to keep track of freed buffers */
1145 char *kmw_ubase; /* buffer for slab data */
1146 } kmem_walk_t;
1147
1148 static int
kmem_walk_init_common(mdb_walk_state_t * wsp,int type)1149 kmem_walk_init_common(mdb_walk_state_t *wsp, int type)
1150 {
1151 kmem_walk_t *kmw;
1152 int ncpus, csize;
1153 kmem_cache_t *cp;
1154 size_t vm_quantum;
1155
1156 size_t magmax, magcnt;
1157 void **maglist = NULL;
1158 uint_t chunksize, slabsize;
1159 int status = WALK_ERR;
1160 uintptr_t addr = wsp->walk_addr;
1161 const char *layered;
1162
1163 type &= ~KM_HASH;
1164
1165 if (addr == NULL) {
1166 mdb_warn("kmem walk doesn't support global walks\n");
1167 return (WALK_ERR);
1168 }
1169
1170 dprintf(("walking %p\n", addr));
1171
1172 /*
1173 * First we need to figure out how many CPUs are configured in the
1174 * system to know how much to slurp out.
1175 */
1176 mdb_readvar(&ncpus, "max_ncpus");
1177
1178 csize = KMEM_CACHE_SIZE(ncpus);
1179 cp = mdb_alloc(csize, UM_SLEEP);
1180
1181 if (mdb_vread(cp, csize, addr) == -1) {
1182 mdb_warn("couldn't read cache at addr %p", addr);
1183 goto out2;
1184 }
1185
1186 /*
1187 * It's easy for someone to hand us an invalid cache address.
1188 * Unfortunately, it is hard for this walker to survive an
1189 * invalid cache cleanly. So we make sure that:
1190 *
1191 * 1. the vmem arena for the cache is readable,
1192 * 2. the vmem arena's quantum is a power of 2,
1193 * 3. our slabsize is a multiple of the quantum, and
1194 * 4. our chunksize is >0 and less than our slabsize.
1195 */
1196 if (mdb_vread(&vm_quantum, sizeof (vm_quantum),
1197 (uintptr_t)&cp->cache_arena->vm_quantum) == -1 ||
1198 vm_quantum == 0 ||
1199 (vm_quantum & (vm_quantum - 1)) != 0 ||
1200 cp->cache_slabsize < vm_quantum ||
1201 P2PHASE(cp->cache_slabsize, vm_quantum) != 0 ||
1202 cp->cache_chunksize == 0 ||
1203 cp->cache_chunksize > cp->cache_slabsize) {
1204 mdb_warn("%p is not a valid kmem_cache_t\n", addr);
1205 goto out2;
1206 }
1207
1208 dprintf(("buf total is %d\n", cp->cache_buftotal));
1209
1210 if (cp->cache_buftotal == 0) {
1211 mdb_free(cp, csize);
1212 return (WALK_DONE);
1213 }
1214
1215 /*
1216 * If they ask for bufctls, but it's a small-slab cache,
1217 * there is nothing to report.
1218 */
1219 if ((type & KM_BUFCTL) && !(cp->cache_flags & KMF_HASH)) {
1220 dprintf(("bufctl requested, not KMF_HASH (flags: %p)\n",
1221 cp->cache_flags));
1222 mdb_free(cp, csize);
1223 return (WALK_DONE);
1224 }
1225
1226 /*
1227 * If they want constructed buffers, but there's no constructor or
1228 * the cache has DEADBEEF checking enabled, there is nothing to report.
1229 */
1230 if ((type & KM_CONSTRUCTED) && (!(type & KM_FREE) ||
1231 cp->cache_constructor == NULL ||
1232 (cp->cache_flags & (KMF_DEADBEEF | KMF_LITE)) == KMF_DEADBEEF)) {
1233 mdb_free(cp, csize);
1234 return (WALK_DONE);
1235 }
1236
1237 /*
1238 * Read in the contents of the magazine layer
1239 */
1240 if (kmem_read_magazines(cp, addr, ncpus, &maglist, &magcnt,
1241 &magmax, UM_SLEEP) == WALK_ERR)
1242 goto out2;
1243
1244 /*
1245 * We have all of the buffers from the magazines; if we are walking
1246 * allocated buffers, sort them so we can bsearch them later.
1247 */
1248 if (type & KM_ALLOCATED)
1249 qsort(maglist, magcnt, sizeof (void *), addrcmp);
1250
1251 wsp->walk_data = kmw = mdb_zalloc(sizeof (kmem_walk_t), UM_SLEEP);
1252
1253 kmw->kmw_type = type;
1254 kmw->kmw_addr = addr;
1255 kmw->kmw_cp = cp;
1256 kmw->kmw_csize = csize;
1257 kmw->kmw_maglist = maglist;
1258 kmw->kmw_max = magmax;
1259 kmw->kmw_count = magcnt;
1260 kmw->kmw_pos = 0;
1261
1262 /*
1263 * When walking allocated buffers in a KMF_HASH cache, we walk the
1264 * hash table instead of the slab layer.
1265 */
1266 if ((cp->cache_flags & KMF_HASH) && (type & KM_ALLOCATED)) {
1267 layered = "kmem_hash";
1268
1269 kmw->kmw_type |= KM_HASH;
1270 } else {
1271 /*
1272 * If we are walking freed buffers, we only need the
1273 * magazine layer plus the partially allocated slabs.
1274 * To walk allocated buffers, we need all of the slabs.
1275 */
1276 if (type & KM_ALLOCATED)
1277 layered = "kmem_slab";
1278 else
1279 layered = "kmem_slab_partial";
1280
1281 /*
1282 * for small-slab caches, we read in the entire slab. For
1283 * freed buffers, we can just walk the freelist. For
1284 * allocated buffers, we use a 'valid' array to track
1285 * the freed buffers.
1286 */
1287 if (!(cp->cache_flags & KMF_HASH)) {
1288 chunksize = cp->cache_chunksize;
1289 slabsize = cp->cache_slabsize;
1290
1291 kmw->kmw_ubase = mdb_alloc(slabsize +
1292 sizeof (kmem_bufctl_t), UM_SLEEP);
1293
1294 if (type & KM_ALLOCATED)
1295 kmw->kmw_valid =
1296 mdb_alloc(slabsize / chunksize, UM_SLEEP);
1297 }
1298 }
1299
1300 status = WALK_NEXT;
1301
1302 if (mdb_layered_walk(layered, wsp) == -1) {
1303 mdb_warn("unable to start layered '%s' walk", layered);
1304 status = WALK_ERR;
1305 }
1306
1307 out1:
1308 if (status == WALK_ERR) {
1309 if (kmw->kmw_valid)
1310 mdb_free(kmw->kmw_valid, slabsize / chunksize);
1311
1312 if (kmw->kmw_ubase)
1313 mdb_free(kmw->kmw_ubase, slabsize +
1314 sizeof (kmem_bufctl_t));
1315
1316 if (kmw->kmw_maglist)
1317 mdb_free(kmw->kmw_maglist,
1318 kmw->kmw_max * sizeof (uintptr_t));
1319
1320 mdb_free(kmw, sizeof (kmem_walk_t));
1321 wsp->walk_data = NULL;
1322 }
1323
1324 out2:
1325 if (status == WALK_ERR)
1326 mdb_free(cp, csize);
1327
1328 return (status);
1329 }
1330
1331 int
kmem_walk_step(mdb_walk_state_t * wsp)1332 kmem_walk_step(mdb_walk_state_t *wsp)
1333 {
1334 kmem_walk_t *kmw = wsp->walk_data;
1335 int type = kmw->kmw_type;
1336 kmem_cache_t *cp = kmw->kmw_cp;
1337
1338 void **maglist = kmw->kmw_maglist;
1339 int magcnt = kmw->kmw_count;
1340
1341 uintptr_t chunksize, slabsize;
1342 uintptr_t addr;
1343 const kmem_slab_t *sp;
1344 const kmem_bufctl_t *bcp;
1345 kmem_bufctl_t bc;
1346
1347 int chunks;
1348 char *kbase;
1349 void *buf;
1350 int i, ret;
1351
1352 char *valid, *ubase;
1353
1354 /*
1355 * first, handle the 'kmem_hash' layered walk case
1356 */
1357 if (type & KM_HASH) {
1358 /*
1359 * We have a buffer which has been allocated out of the
1360 * global layer. We need to make sure that it's not
1361 * actually sitting in a magazine before we report it as
1362 * an allocated buffer.
1363 */
1364 buf = ((const kmem_bufctl_t *)wsp->walk_layer)->bc_addr;
1365
1366 if (magcnt > 0 &&
1367 bsearch(&buf, maglist, magcnt, sizeof (void *),
1368 addrcmp) != NULL)
1369 return (WALK_NEXT);
1370
1371 if (type & KM_BUFCTL)
1372 return (bufctl_walk_callback(cp, wsp, wsp->walk_addr));
1373
1374 return (kmem_walk_callback(wsp, (uintptr_t)buf));
1375 }
1376
1377 ret = WALK_NEXT;
1378
1379 addr = kmw->kmw_addr;
1380
1381 /*
1382 * If we're walking freed buffers, report everything in the
1383 * magazine layer before processing the first slab.
1384 */
1385 if ((type & KM_FREE) && magcnt != 0) {
1386 kmw->kmw_count = 0; /* only do this once */
1387 for (i = 0; i < magcnt; i++) {
1388 buf = maglist[i];
1389
1390 if (type & KM_BUFCTL) {
1391 uintptr_t out;
1392
1393 if (cp->cache_flags & KMF_BUFTAG) {
1394 kmem_buftag_t *btp;
1395 kmem_buftag_t tag;
1396
1397 /* LINTED - alignment */
1398 btp = KMEM_BUFTAG(cp, buf);
1399 if (mdb_vread(&tag, sizeof (tag),
1400 (uintptr_t)btp) == -1) {
1401 mdb_warn("reading buftag for "
1402 "%p at %p", buf, btp);
1403 continue;
1404 }
1405 out = (uintptr_t)tag.bt_bufctl;
1406 } else {
1407 if (kmem_hash_lookup(cp, addr, buf,
1408 &out) == -1)
1409 continue;
1410 }
1411 ret = bufctl_walk_callback(cp, wsp, out);
1412 } else {
1413 ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1414 }
1415
1416 if (ret != WALK_NEXT)
1417 return (ret);
1418 }
1419 }
1420
1421 /*
1422 * If they want constructed buffers, we're finished, since the
1423 * magazine layer holds them all.
1424 */
1425 if (type & KM_CONSTRUCTED)
1426 return (WALK_DONE);
1427
1428 /*
1429 * Handle the buffers in the current slab
1430 */
1431 chunksize = cp->cache_chunksize;
1432 slabsize = cp->cache_slabsize;
1433
1434 sp = wsp->walk_layer;
1435 chunks = sp->slab_chunks;
1436 kbase = sp->slab_base;
1437
1438 dprintf(("kbase is %p\n", kbase));
1439
1440 if (!(cp->cache_flags & KMF_HASH)) {
1441 valid = kmw->kmw_valid;
1442 ubase = kmw->kmw_ubase;
1443
1444 if (mdb_vread(ubase, chunks * chunksize,
1445 (uintptr_t)kbase) == -1) {
1446 mdb_warn("failed to read slab contents at %p", kbase);
1447 return (WALK_ERR);
1448 }
1449
1450 /*
1451 * Set up the valid map as fully allocated -- we'll punch
1452 * out the freelist.
1453 */
1454 if (type & KM_ALLOCATED)
1455 (void) memset(valid, 1, chunks);
1456 } else {
1457 valid = NULL;
1458 ubase = NULL;
1459 }
1460
1461 /*
1462 * walk the slab's freelist
1463 */
1464 bcp = sp->slab_head;
1465
1466 dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks));
1467
1468 /*
1469 * since we could be in the middle of allocating a buffer,
1470 * our refcnt could be one higher than it aught. So we
1471 * check one further on the freelist than the count allows.
1472 */
1473 for (i = sp->slab_refcnt; i <= chunks; i++) {
1474 uint_t ndx;
1475
1476 dprintf(("bcp is %p\n", bcp));
1477
1478 if (bcp == NULL) {
1479 if (i == chunks)
1480 break;
1481 mdb_warn(
1482 "slab %p in cache %p freelist too short by %d\n",
1483 sp, addr, chunks - i);
1484 break;
1485 }
1486
1487 if (cp->cache_flags & KMF_HASH) {
1488 if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) {
1489 mdb_warn("failed to read bufctl ptr at %p",
1490 bcp);
1491 break;
1492 }
1493 buf = bc.bc_addr;
1494 } else {
1495 /*
1496 * Otherwise the buffer is (or should be) in the slab
1497 * that we've read in; determine its offset in the
1498 * slab, validate that it's not corrupt, and add to
1499 * our base address to find the umem_bufctl_t. (Note
1500 * that we don't need to add the size of the bufctl
1501 * to our offset calculation because of the slop that's
1502 * allocated for the buffer at ubase.)
1503 */
1504 uintptr_t offs = (uintptr_t)bcp - (uintptr_t)kbase;
1505
1506 if (offs > chunks * chunksize) {
1507 mdb_warn("found corrupt bufctl ptr %p"
1508 " in slab %p in cache %p\n", bcp,
1509 wsp->walk_addr, addr);
1510 break;
1511 }
1512
1513 bc = *((kmem_bufctl_t *)((uintptr_t)ubase + offs));
1514 buf = KMEM_BUF(cp, bcp);
1515 }
1516
1517 ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize;
1518
1519 if (ndx > slabsize / cp->cache_bufsize) {
1520 /*
1521 * This is very wrong; we have managed to find
1522 * a buffer in the slab which shouldn't
1523 * actually be here. Emit a warning, and
1524 * try to continue.
1525 */
1526 mdb_warn("buf %p is out of range for "
1527 "slab %p, cache %p\n", buf, sp, addr);
1528 } else if (type & KM_ALLOCATED) {
1529 /*
1530 * we have found a buffer on the slab's freelist;
1531 * clear its entry
1532 */
1533 valid[ndx] = 0;
1534 } else {
1535 /*
1536 * Report this freed buffer
1537 */
1538 if (type & KM_BUFCTL) {
1539 ret = bufctl_walk_callback(cp, wsp,
1540 (uintptr_t)bcp);
1541 } else {
1542 ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1543 }
1544 if (ret != WALK_NEXT)
1545 return (ret);
1546 }
1547
1548 bcp = bc.bc_next;
1549 }
1550
1551 if (bcp != NULL) {
1552 dprintf(("slab %p in cache %p freelist too long (%p)\n",
1553 sp, addr, bcp));
1554 }
1555
1556 /*
1557 * If we are walking freed buffers, the loop above handled reporting
1558 * them.
1559 */
1560 if (type & KM_FREE)
1561 return (WALK_NEXT);
1562
1563 if (type & KM_BUFCTL) {
1564 mdb_warn("impossible situation: small-slab KM_BUFCTL walk for "
1565 "cache %p\n", addr);
1566 return (WALK_ERR);
1567 }
1568
1569 /*
1570 * Report allocated buffers, skipping buffers in the magazine layer.
1571 * We only get this far for small-slab caches.
1572 */
1573 for (i = 0; ret == WALK_NEXT && i < chunks; i++) {
1574 buf = (char *)kbase + i * chunksize;
1575
1576 if (!valid[i])
1577 continue; /* on slab freelist */
1578
1579 if (magcnt > 0 &&
1580 bsearch(&buf, maglist, magcnt, sizeof (void *),
1581 addrcmp) != NULL)
1582 continue; /* in magazine layer */
1583
1584 ret = kmem_walk_callback(wsp, (uintptr_t)buf);
1585 }
1586 return (ret);
1587 }
1588
1589 void
kmem_walk_fini(mdb_walk_state_t * wsp)1590 kmem_walk_fini(mdb_walk_state_t *wsp)
1591 {
1592 kmem_walk_t *kmw = wsp->walk_data;
1593 uintptr_t chunksize;
1594 uintptr_t slabsize;
1595
1596 if (kmw == NULL)
1597 return;
1598
1599 if (kmw->kmw_maglist != NULL)
1600 mdb_free(kmw->kmw_maglist, kmw->kmw_max * sizeof (void *));
1601
1602 chunksize = kmw->kmw_cp->cache_chunksize;
1603 slabsize = kmw->kmw_cp->cache_slabsize;
1604
1605 if (kmw->kmw_valid != NULL)
1606 mdb_free(kmw->kmw_valid, slabsize / chunksize);
1607 if (kmw->kmw_ubase != NULL)
1608 mdb_free(kmw->kmw_ubase, slabsize + sizeof (kmem_bufctl_t));
1609
1610 mdb_free(kmw->kmw_cp, kmw->kmw_csize);
1611 mdb_free(kmw, sizeof (kmem_walk_t));
1612 }
1613
1614 /*ARGSUSED*/
1615 static int
kmem_walk_all(uintptr_t addr,const kmem_cache_t * c,mdb_walk_state_t * wsp)1616 kmem_walk_all(uintptr_t addr, const kmem_cache_t *c, mdb_walk_state_t *wsp)
1617 {
1618 /*
1619 * Buffers allocated from NOTOUCH caches can also show up as freed
1620 * memory in other caches. This can be a little confusing, so we
1621 * don't walk NOTOUCH caches when walking all caches (thereby assuring
1622 * that "::walk kmem" and "::walk freemem" yield disjoint output).
1623 */
1624 if (c->cache_cflags & KMC_NOTOUCH)
1625 return (WALK_NEXT);
1626
1627 if (mdb_pwalk(wsp->walk_data, wsp->walk_callback,
1628 wsp->walk_cbdata, addr) == -1)
1629 return (WALK_DONE);
1630
1631 return (WALK_NEXT);
1632 }
1633
1634 #define KMEM_WALK_ALL(name, wsp) { \
1635 wsp->walk_data = (name); \
1636 if (mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_walk_all, wsp) == -1) \
1637 return (WALK_ERR); \
1638 return (WALK_DONE); \
1639 }
1640
1641 int
kmem_walk_init(mdb_walk_state_t * wsp)1642 kmem_walk_init(mdb_walk_state_t *wsp)
1643 {
1644 if (wsp->walk_arg != NULL)
1645 wsp->walk_addr = (uintptr_t)wsp->walk_arg;
1646
1647 if (wsp->walk_addr == NULL)
1648 KMEM_WALK_ALL("kmem", wsp);
1649 return (kmem_walk_init_common(wsp, KM_ALLOCATED));
1650 }
1651
1652 int
bufctl_walk_init(mdb_walk_state_t * wsp)1653 bufctl_walk_init(mdb_walk_state_t *wsp)
1654 {
1655 if (wsp->walk_addr == NULL)
1656 KMEM_WALK_ALL("bufctl", wsp);
1657 return (kmem_walk_init_common(wsp, KM_ALLOCATED | KM_BUFCTL));
1658 }
1659
1660 int
freemem_walk_init(mdb_walk_state_t * wsp)1661 freemem_walk_init(mdb_walk_state_t *wsp)
1662 {
1663 if (wsp->walk_addr == NULL)
1664 KMEM_WALK_ALL("freemem", wsp);
1665 return (kmem_walk_init_common(wsp, KM_FREE));
1666 }
1667
1668 int
freemem_constructed_walk_init(mdb_walk_state_t * wsp)1669 freemem_constructed_walk_init(mdb_walk_state_t *wsp)
1670 {
1671 if (wsp->walk_addr == NULL)
1672 KMEM_WALK_ALL("freemem_constructed", wsp);
1673 return (kmem_walk_init_common(wsp, KM_FREE | KM_CONSTRUCTED));
1674 }
1675
1676 int
freectl_walk_init(mdb_walk_state_t * wsp)1677 freectl_walk_init(mdb_walk_state_t *wsp)
1678 {
1679 if (wsp->walk_addr == NULL)
1680 KMEM_WALK_ALL("freectl", wsp);
1681 return (kmem_walk_init_common(wsp, KM_FREE | KM_BUFCTL));
1682 }
1683
1684 int
freectl_constructed_walk_init(mdb_walk_state_t * wsp)1685 freectl_constructed_walk_init(mdb_walk_state_t *wsp)
1686 {
1687 if (wsp->walk_addr == NULL)
1688 KMEM_WALK_ALL("freectl_constructed", wsp);
1689 return (kmem_walk_init_common(wsp,
1690 KM_FREE | KM_BUFCTL | KM_CONSTRUCTED));
1691 }
1692
1693 typedef struct bufctl_history_walk {
1694 void *bhw_next;
1695 kmem_cache_t *bhw_cache;
1696 kmem_slab_t *bhw_slab;
1697 hrtime_t bhw_timestamp;
1698 } bufctl_history_walk_t;
1699
1700 int
bufctl_history_walk_init(mdb_walk_state_t * wsp)1701 bufctl_history_walk_init(mdb_walk_state_t *wsp)
1702 {
1703 bufctl_history_walk_t *bhw;
1704 kmem_bufctl_audit_t bc;
1705 kmem_bufctl_audit_t bcn;
1706
1707 if (wsp->walk_addr == NULL) {
1708 mdb_warn("bufctl_history walk doesn't support global walks\n");
1709 return (WALK_ERR);
1710 }
1711
1712 if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) {
1713 mdb_warn("unable to read bufctl at %p", wsp->walk_addr);
1714 return (WALK_ERR);
1715 }
1716
1717 bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP);
1718 bhw->bhw_timestamp = 0;
1719 bhw->bhw_cache = bc.bc_cache;
1720 bhw->bhw_slab = bc.bc_slab;
1721
1722 /*
1723 * sometimes the first log entry matches the base bufctl; in that
1724 * case, skip the base bufctl.
1725 */
1726 if (bc.bc_lastlog != NULL &&
1727 mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 &&
1728 bc.bc_addr == bcn.bc_addr &&
1729 bc.bc_cache == bcn.bc_cache &&
1730 bc.bc_slab == bcn.bc_slab &&
1731 bc.bc_timestamp == bcn.bc_timestamp &&
1732 bc.bc_thread == bcn.bc_thread)
1733 bhw->bhw_next = bc.bc_lastlog;
1734 else
1735 bhw->bhw_next = (void *)wsp->walk_addr;
1736
1737 wsp->walk_addr = (uintptr_t)bc.bc_addr;
1738 wsp->walk_data = bhw;
1739
1740 return (WALK_NEXT);
1741 }
1742
1743 int
bufctl_history_walk_step(mdb_walk_state_t * wsp)1744 bufctl_history_walk_step(mdb_walk_state_t *wsp)
1745 {
1746 bufctl_history_walk_t *bhw = wsp->walk_data;
1747 uintptr_t addr = (uintptr_t)bhw->bhw_next;
1748 uintptr_t baseaddr = wsp->walk_addr;
1749 kmem_bufctl_audit_t bc;
1750
1751 if (addr == NULL)
1752 return (WALK_DONE);
1753
1754 if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
1755 mdb_warn("unable to read bufctl at %p", bhw->bhw_next);
1756 return (WALK_ERR);
1757 }
1758
1759 /*
1760 * The bufctl is only valid if the address, cache, and slab are
1761 * correct. We also check that the timestamp is decreasing, to
1762 * prevent infinite loops.
1763 */
1764 if ((uintptr_t)bc.bc_addr != baseaddr ||
1765 bc.bc_cache != bhw->bhw_cache ||
1766 bc.bc_slab != bhw->bhw_slab ||
1767 (bhw->bhw_timestamp != 0 && bc.bc_timestamp >= bhw->bhw_timestamp))
1768 return (WALK_DONE);
1769
1770 bhw->bhw_next = bc.bc_lastlog;
1771 bhw->bhw_timestamp = bc.bc_timestamp;
1772
1773 return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata));
1774 }
1775
1776 void
bufctl_history_walk_fini(mdb_walk_state_t * wsp)1777 bufctl_history_walk_fini(mdb_walk_state_t *wsp)
1778 {
1779 bufctl_history_walk_t *bhw = wsp->walk_data;
1780
1781 mdb_free(bhw, sizeof (*bhw));
1782 }
1783
1784 typedef struct kmem_log_walk {
1785 kmem_bufctl_audit_t *klw_base;
1786 kmem_bufctl_audit_t **klw_sorted;
1787 kmem_log_header_t klw_lh;
1788 size_t klw_size;
1789 size_t klw_maxndx;
1790 size_t klw_ndx;
1791 } kmem_log_walk_t;
1792
1793 int
kmem_log_walk_init(mdb_walk_state_t * wsp)1794 kmem_log_walk_init(mdb_walk_state_t *wsp)
1795 {
1796 uintptr_t lp = wsp->walk_addr;
1797 kmem_log_walk_t *klw;
1798 kmem_log_header_t *lhp;
1799 int maxndx, i, j, k;
1800
1801 /*
1802 * By default (global walk), walk the kmem_transaction_log. Otherwise
1803 * read the log whose kmem_log_header_t is stored at walk_addr.
1804 */
1805 if (lp == NULL && mdb_readvar(&lp, "kmem_transaction_log") == -1) {
1806 mdb_warn("failed to read 'kmem_transaction_log'");
1807 return (WALK_ERR);
1808 }
1809
1810 if (lp == NULL) {
1811 mdb_warn("log is disabled\n");
1812 return (WALK_ERR);
1813 }
1814
1815 klw = mdb_zalloc(sizeof (kmem_log_walk_t), UM_SLEEP);
1816 lhp = &klw->klw_lh;
1817
1818 if (mdb_vread(lhp, sizeof (kmem_log_header_t), lp) == -1) {
1819 mdb_warn("failed to read log header at %p", lp);
1820 mdb_free(klw, sizeof (kmem_log_walk_t));
1821 return (WALK_ERR);
1822 }
1823
1824 klw->klw_size = lhp->lh_chunksize * lhp->lh_nchunks;
1825 klw->klw_base = mdb_alloc(klw->klw_size, UM_SLEEP);
1826 maxndx = lhp->lh_chunksize / sizeof (kmem_bufctl_audit_t) - 1;
1827
1828 if (mdb_vread(klw->klw_base, klw->klw_size,
1829 (uintptr_t)lhp->lh_base) == -1) {
1830 mdb_warn("failed to read log at base %p", lhp->lh_base);
1831 mdb_free(klw->klw_base, klw->klw_size);
1832 mdb_free(klw, sizeof (kmem_log_walk_t));
1833 return (WALK_ERR);
1834 }
1835
1836 klw->klw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks *
1837 sizeof (kmem_bufctl_audit_t *), UM_SLEEP);
1838
1839 for (i = 0, k = 0; i < lhp->lh_nchunks; i++) {
1840 kmem_bufctl_audit_t *chunk = (kmem_bufctl_audit_t *)
1841 ((uintptr_t)klw->klw_base + i * lhp->lh_chunksize);
1842
1843 for (j = 0; j < maxndx; j++)
1844 klw->klw_sorted[k++] = &chunk[j];
1845 }
1846
1847 qsort(klw->klw_sorted, k, sizeof (kmem_bufctl_audit_t *),
1848 (int(*)(const void *, const void *))bufctlcmp);
1849
1850 klw->klw_maxndx = k;
1851 wsp->walk_data = klw;
1852
1853 return (WALK_NEXT);
1854 }
1855
1856 int
kmem_log_walk_step(mdb_walk_state_t * wsp)1857 kmem_log_walk_step(mdb_walk_state_t *wsp)
1858 {
1859 kmem_log_walk_t *klw = wsp->walk_data;
1860 kmem_bufctl_audit_t *bcp;
1861
1862 if (klw->klw_ndx == klw->klw_maxndx)
1863 return (WALK_DONE);
1864
1865 bcp = klw->klw_sorted[klw->klw_ndx++];
1866
1867 return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)klw->klw_base +
1868 (uintptr_t)klw->klw_lh.lh_base, bcp, wsp->walk_cbdata));
1869 }
1870
1871 void
kmem_log_walk_fini(mdb_walk_state_t * wsp)1872 kmem_log_walk_fini(mdb_walk_state_t *wsp)
1873 {
1874 kmem_log_walk_t *klw = wsp->walk_data;
1875
1876 mdb_free(klw->klw_base, klw->klw_size);
1877 mdb_free(klw->klw_sorted, klw->klw_maxndx *
1878 sizeof (kmem_bufctl_audit_t *));
1879 mdb_free(klw, sizeof (kmem_log_walk_t));
1880 }
1881
1882 typedef struct allocdby_bufctl {
1883 uintptr_t abb_addr;
1884 hrtime_t abb_ts;
1885 } allocdby_bufctl_t;
1886
1887 typedef struct allocdby_walk {
1888 const char *abw_walk;
1889 uintptr_t abw_thread;
1890 size_t abw_nbufs;
1891 size_t abw_size;
1892 allocdby_bufctl_t *abw_buf;
1893 size_t abw_ndx;
1894 } allocdby_walk_t;
1895
1896 int
allocdby_walk_bufctl(uintptr_t addr,const kmem_bufctl_audit_t * bcp,allocdby_walk_t * abw)1897 allocdby_walk_bufctl(uintptr_t addr, const kmem_bufctl_audit_t *bcp,
1898 allocdby_walk_t *abw)
1899 {
1900 if ((uintptr_t)bcp->bc_thread != abw->abw_thread)
1901 return (WALK_NEXT);
1902
1903 if (abw->abw_nbufs == abw->abw_size) {
1904 allocdby_bufctl_t *buf;
1905 size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size;
1906
1907 buf = mdb_zalloc(oldsize << 1, UM_SLEEP);
1908
1909 bcopy(abw->abw_buf, buf, oldsize);
1910 mdb_free(abw->abw_buf, oldsize);
1911
1912 abw->abw_size <<= 1;
1913 abw->abw_buf = buf;
1914 }
1915
1916 abw->abw_buf[abw->abw_nbufs].abb_addr = addr;
1917 abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp;
1918 abw->abw_nbufs++;
1919
1920 return (WALK_NEXT);
1921 }
1922
1923 /*ARGSUSED*/
1924 int
allocdby_walk_cache(uintptr_t addr,const kmem_cache_t * c,allocdby_walk_t * abw)1925 allocdby_walk_cache(uintptr_t addr, const kmem_cache_t *c, allocdby_walk_t *abw)
1926 {
1927 if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl,
1928 abw, addr) == -1) {
1929 mdb_warn("couldn't walk bufctl for cache %p", addr);
1930 return (WALK_DONE);
1931 }
1932
1933 return (WALK_NEXT);
1934 }
1935
1936 static int
allocdby_cmp(const allocdby_bufctl_t * lhs,const allocdby_bufctl_t * rhs)1937 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs)
1938 {
1939 if (lhs->abb_ts < rhs->abb_ts)
1940 return (1);
1941 if (lhs->abb_ts > rhs->abb_ts)
1942 return (-1);
1943 return (0);
1944 }
1945
1946 static int
allocdby_walk_init_common(mdb_walk_state_t * wsp,const char * walk)1947 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk)
1948 {
1949 allocdby_walk_t *abw;
1950
1951 if (wsp->walk_addr == NULL) {
1952 mdb_warn("allocdby walk doesn't support global walks\n");
1953 return (WALK_ERR);
1954 }
1955
1956 abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP);
1957
1958 abw->abw_thread = wsp->walk_addr;
1959 abw->abw_walk = walk;
1960 abw->abw_size = 128; /* something reasonable */
1961 abw->abw_buf =
1962 mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP);
1963
1964 wsp->walk_data = abw;
1965
1966 if (mdb_walk("kmem_cache",
1967 (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) {
1968 mdb_warn("couldn't walk kmem_cache");
1969 allocdby_walk_fini(wsp);
1970 return (WALK_ERR);
1971 }
1972
1973 qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t),
1974 (int(*)(const void *, const void *))allocdby_cmp);
1975
1976 return (WALK_NEXT);
1977 }
1978
1979 int
allocdby_walk_init(mdb_walk_state_t * wsp)1980 allocdby_walk_init(mdb_walk_state_t *wsp)
1981 {
1982 return (allocdby_walk_init_common(wsp, "bufctl"));
1983 }
1984
1985 int
freedby_walk_init(mdb_walk_state_t * wsp)1986 freedby_walk_init(mdb_walk_state_t *wsp)
1987 {
1988 return (allocdby_walk_init_common(wsp, "freectl"));
1989 }
1990
1991 int
allocdby_walk_step(mdb_walk_state_t * wsp)1992 allocdby_walk_step(mdb_walk_state_t *wsp)
1993 {
1994 allocdby_walk_t *abw = wsp->walk_data;
1995 kmem_bufctl_audit_t bc;
1996 uintptr_t addr;
1997
1998 if (abw->abw_ndx == abw->abw_nbufs)
1999 return (WALK_DONE);
2000
2001 addr = abw->abw_buf[abw->abw_ndx++].abb_addr;
2002
2003 if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
2004 mdb_warn("couldn't read bufctl at %p", addr);
2005 return (WALK_DONE);
2006 }
2007
2008 return (wsp->walk_callback(addr, &bc, wsp->walk_cbdata));
2009 }
2010
2011 void
allocdby_walk_fini(mdb_walk_state_t * wsp)2012 allocdby_walk_fini(mdb_walk_state_t *wsp)
2013 {
2014 allocdby_walk_t *abw = wsp->walk_data;
2015
2016 mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size);
2017 mdb_free(abw, sizeof (allocdby_walk_t));
2018 }
2019
2020 /*ARGSUSED*/
2021 int
allocdby_walk(uintptr_t addr,const kmem_bufctl_audit_t * bcp,void * ignored)2022 allocdby_walk(uintptr_t addr, const kmem_bufctl_audit_t *bcp, void *ignored)
2023 {
2024 char c[MDB_SYM_NAMLEN];
2025 GElf_Sym sym;
2026 int i;
2027
2028 mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp);
2029 for (i = 0; i < bcp->bc_depth; i++) {
2030 if (mdb_lookup_by_addr(bcp->bc_stack[i],
2031 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2032 continue;
2033 if (strncmp(c, "kmem_", 5) == 0)
2034 continue;
2035 mdb_printf("%s+0x%lx",
2036 c, bcp->bc_stack[i] - (uintptr_t)sym.st_value);
2037 break;
2038 }
2039 mdb_printf("\n");
2040
2041 return (WALK_NEXT);
2042 }
2043
2044 static int
allocdby_common(uintptr_t addr,uint_t flags,const char * w)2045 allocdby_common(uintptr_t addr, uint_t flags, const char *w)
2046 {
2047 if (!(flags & DCMD_ADDRSPEC))
2048 return (DCMD_USAGE);
2049
2050 mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER");
2051
2052 if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) {
2053 mdb_warn("can't walk '%s' for %p", w, addr);
2054 return (DCMD_ERR);
2055 }
2056
2057 return (DCMD_OK);
2058 }
2059
2060 /*ARGSUSED*/
2061 int
allocdby(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2062 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2063 {
2064 return (allocdby_common(addr, flags, "allocdby"));
2065 }
2066
2067 /*ARGSUSED*/
2068 int
freedby(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2069 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2070 {
2071 return (allocdby_common(addr, flags, "freedby"));
2072 }
2073
2074 /*
2075 * Return a string describing the address in relation to the given thread's
2076 * stack.
2077 *
2078 * - If the thread state is TS_FREE, return " (inactive interrupt thread)".
2079 *
2080 * - If the address is above the stack pointer, return an empty string
2081 * signifying that the address is active.
2082 *
2083 * - If the address is below the stack pointer, and the thread is not on proc,
2084 * return " (below sp)".
2085 *
2086 * - If the address is below the stack pointer, and the thread is on proc,
2087 * return " (possibly below sp)". Depending on context, we may or may not
2088 * have an accurate t_sp.
2089 */
2090 static const char *
stack_active(const kthread_t * t,uintptr_t addr)2091 stack_active(const kthread_t *t, uintptr_t addr)
2092 {
2093 uintptr_t panicstk;
2094 GElf_Sym sym;
2095
2096 if (t->t_state == TS_FREE)
2097 return (" (inactive interrupt thread)");
2098
2099 /*
2100 * Check to see if we're on the panic stack. If so, ignore t_sp, as it
2101 * no longer relates to the thread's real stack.
2102 */
2103 if (mdb_lookup_by_name("panic_stack", &sym) == 0) {
2104 panicstk = (uintptr_t)sym.st_value;
2105
2106 if (t->t_sp >= panicstk && t->t_sp < panicstk + PANICSTKSIZE)
2107 return ("");
2108 }
2109
2110 if (addr >= t->t_sp + STACK_BIAS)
2111 return ("");
2112
2113 if (t->t_state == TS_ONPROC)
2114 return (" (possibly below sp)");
2115
2116 return (" (below sp)");
2117 }
2118
2119 /*
2120 * Additional state for the kmem and vmem ::whatis handlers
2121 */
2122 typedef struct whatis_info {
2123 mdb_whatis_t *wi_w;
2124 const kmem_cache_t *wi_cache;
2125 const vmem_t *wi_vmem;
2126 vmem_t *wi_msb_arena;
2127 size_t wi_slab_size;
2128 uint_t wi_slab_found;
2129 uint_t wi_kmem_lite_count;
2130 uint_t wi_freemem;
2131 } whatis_info_t;
2132
2133 /* call one of our dcmd functions with "-v" and the provided address */
2134 static void
whatis_call_printer(mdb_dcmd_f * dcmd,uintptr_t addr)2135 whatis_call_printer(mdb_dcmd_f *dcmd, uintptr_t addr)
2136 {
2137 mdb_arg_t a;
2138 a.a_type = MDB_TYPE_STRING;
2139 a.a_un.a_str = "-v";
2140
2141 mdb_printf(":\n");
2142 (void) (*dcmd)(addr, DCMD_ADDRSPEC, 1, &a);
2143 }
2144
2145 static void
whatis_print_kmf_lite(uintptr_t btaddr,size_t count)2146 whatis_print_kmf_lite(uintptr_t btaddr, size_t count)
2147 {
2148 #define KMEM_LITE_MAX 16
2149 pc_t callers[KMEM_LITE_MAX];
2150 pc_t uninit = (pc_t)KMEM_UNINITIALIZED_PATTERN;
2151
2152 kmem_buftag_t bt;
2153 intptr_t stat;
2154 const char *plural = "";
2155 int i;
2156
2157 /* validate our arguments and read in the buftag */
2158 if (count == 0 || count > KMEM_LITE_MAX ||
2159 mdb_vread(&bt, sizeof (bt), btaddr) == -1)
2160 return;
2161
2162 /* validate the buffer state and read in the callers */
2163 stat = (intptr_t)bt.bt_bufctl ^ bt.bt_bxstat;
2164
2165 if (stat != KMEM_BUFTAG_ALLOC && stat != KMEM_BUFTAG_FREE)
2166 return;
2167
2168 if (mdb_vread(callers, count * sizeof (pc_t),
2169 btaddr + offsetof(kmem_buftag_lite_t, bt_history)) == -1)
2170 return;
2171
2172 /* If there aren't any filled in callers, bail */
2173 if (callers[0] == uninit)
2174 return;
2175
2176 plural = (callers[1] == uninit) ? "" : "s";
2177
2178 /* Everything's done and checked; print them out */
2179 mdb_printf(":\n");
2180
2181 mdb_inc_indent(8);
2182 mdb_printf("recent caller%s: %a", plural, callers[0]);
2183 for (i = 1; i < count; i++) {
2184 if (callers[i] == uninit)
2185 break;
2186 mdb_printf(", %a", callers[i]);
2187 }
2188 mdb_dec_indent(8);
2189 }
2190
2191 static void
whatis_print_kmem(whatis_info_t * wi,uintptr_t maddr,uintptr_t addr,uintptr_t baddr)2192 whatis_print_kmem(whatis_info_t *wi, uintptr_t maddr, uintptr_t addr,
2193 uintptr_t baddr)
2194 {
2195 mdb_whatis_t *w = wi->wi_w;
2196
2197 const kmem_cache_t *cp = wi->wi_cache;
2198 /* LINTED pointer cast may result in improper alignment */
2199 uintptr_t btaddr = (uintptr_t)KMEM_BUFTAG(cp, addr);
2200 int quiet = (mdb_whatis_flags(w) & WHATIS_QUIET);
2201 int call_printer = (!quiet && (cp->cache_flags & KMF_AUDIT));
2202
2203 mdb_whatis_report_object(w, maddr, addr, "");
2204
2205 if (baddr != 0 && !call_printer)
2206 mdb_printf("bufctl %p ", baddr);
2207
2208 mdb_printf("%s from %s",
2209 (wi->wi_freemem == FALSE) ? "allocated" : "freed", cp->cache_name);
2210
2211 if (baddr != 0 && call_printer) {
2212 whatis_call_printer(bufctl, baddr);
2213 return;
2214 }
2215
2216 /* for KMF_LITE caches, try to print out the previous callers */
2217 if (!quiet && (cp->cache_flags & KMF_LITE))
2218 whatis_print_kmf_lite(btaddr, wi->wi_kmem_lite_count);
2219
2220 mdb_printf("\n");
2221 }
2222
2223 /*ARGSUSED*/
2224 static int
whatis_walk_kmem(uintptr_t addr,void * ignored,whatis_info_t * wi)2225 whatis_walk_kmem(uintptr_t addr, void *ignored, whatis_info_t *wi)
2226 {
2227 mdb_whatis_t *w = wi->wi_w;
2228
2229 uintptr_t cur;
2230 size_t size = wi->wi_cache->cache_bufsize;
2231
2232 while (mdb_whatis_match(w, addr, size, &cur))
2233 whatis_print_kmem(wi, cur, addr, NULL);
2234
2235 return (WHATIS_WALKRET(w));
2236 }
2237
2238 /*ARGSUSED*/
2239 static int
whatis_walk_bufctl(uintptr_t baddr,const kmem_bufctl_t * bcp,whatis_info_t * wi)2240 whatis_walk_bufctl(uintptr_t baddr, const kmem_bufctl_t *bcp, whatis_info_t *wi)
2241 {
2242 mdb_whatis_t *w = wi->wi_w;
2243
2244 uintptr_t cur;
2245 uintptr_t addr = (uintptr_t)bcp->bc_addr;
2246 size_t size = wi->wi_cache->cache_bufsize;
2247
2248 while (mdb_whatis_match(w, addr, size, &cur))
2249 whatis_print_kmem(wi, cur, addr, baddr);
2250
2251 return (WHATIS_WALKRET(w));
2252 }
2253
2254 static int
whatis_walk_seg(uintptr_t addr,const vmem_seg_t * vs,whatis_info_t * wi)2255 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_info_t *wi)
2256 {
2257 mdb_whatis_t *w = wi->wi_w;
2258
2259 size_t size = vs->vs_end - vs->vs_start;
2260 uintptr_t cur;
2261
2262 /* We're not interested in anything but alloc and free segments */
2263 if (vs->vs_type != VMEM_ALLOC && vs->vs_type != VMEM_FREE)
2264 return (WALK_NEXT);
2265
2266 while (mdb_whatis_match(w, vs->vs_start, size, &cur)) {
2267 mdb_whatis_report_object(w, cur, vs->vs_start, "");
2268
2269 /*
2270 * If we're not printing it seperately, provide the vmem_seg
2271 * pointer if it has a stack trace.
2272 */
2273 if ((mdb_whatis_flags(w) & WHATIS_QUIET) &&
2274 (!(mdb_whatis_flags(w) & WHATIS_BUFCTL) ||
2275 (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0))) {
2276 mdb_printf("vmem_seg %p ", addr);
2277 }
2278
2279 mdb_printf("%s from the %s vmem arena",
2280 (vs->vs_type == VMEM_ALLOC) ? "allocated" : "freed",
2281 wi->wi_vmem->vm_name);
2282
2283 if (!(mdb_whatis_flags(w) & WHATIS_QUIET))
2284 whatis_call_printer(vmem_seg, addr);
2285 else
2286 mdb_printf("\n");
2287 }
2288
2289 return (WHATIS_WALKRET(w));
2290 }
2291
2292 static int
whatis_walk_vmem(uintptr_t addr,const vmem_t * vmem,whatis_info_t * wi)2293 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_info_t *wi)
2294 {
2295 mdb_whatis_t *w = wi->wi_w;
2296 const char *nm = vmem->vm_name;
2297
2298 int identifier = ((vmem->vm_cflags & VMC_IDENTIFIER) != 0);
2299 int idspace = ((mdb_whatis_flags(w) & WHATIS_IDSPACE) != 0);
2300
2301 if (identifier != idspace)
2302 return (WALK_NEXT);
2303
2304 wi->wi_vmem = vmem;
2305
2306 if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2307 mdb_printf("Searching vmem arena %s...\n", nm);
2308
2309 if (mdb_pwalk("vmem_seg",
2310 (mdb_walk_cb_t)whatis_walk_seg, wi, addr) == -1) {
2311 mdb_warn("can't walk vmem_seg for %p", addr);
2312 return (WALK_NEXT);
2313 }
2314
2315 return (WHATIS_WALKRET(w));
2316 }
2317
2318 /*ARGSUSED*/
2319 static int
whatis_walk_slab(uintptr_t saddr,const kmem_slab_t * sp,whatis_info_t * wi)2320 whatis_walk_slab(uintptr_t saddr, const kmem_slab_t *sp, whatis_info_t *wi)
2321 {
2322 mdb_whatis_t *w = wi->wi_w;
2323
2324 /* It must overlap with the slab data, or it's not interesting */
2325 if (mdb_whatis_overlaps(w,
2326 (uintptr_t)sp->slab_base, wi->wi_slab_size)) {
2327 wi->wi_slab_found++;
2328 return (WALK_DONE);
2329 }
2330 return (WALK_NEXT);
2331 }
2332
2333 static int
whatis_walk_cache(uintptr_t addr,const kmem_cache_t * c,whatis_info_t * wi)2334 whatis_walk_cache(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2335 {
2336 mdb_whatis_t *w = wi->wi_w;
2337
2338 char *walk, *freewalk;
2339 mdb_walk_cb_t func;
2340 int do_bufctl;
2341
2342 int identifier = ((c->cache_flags & KMC_IDENTIFIER) != 0);
2343 int idspace = ((mdb_whatis_flags(w) & WHATIS_IDSPACE) != 0);
2344
2345 if (identifier != idspace)
2346 return (WALK_NEXT);
2347
2348 /* Override the '-b' flag as necessary */
2349 if (!(c->cache_flags & KMF_HASH))
2350 do_bufctl = FALSE; /* no bufctls to walk */
2351 else if (c->cache_flags & KMF_AUDIT)
2352 do_bufctl = TRUE; /* we always want debugging info */
2353 else
2354 do_bufctl = ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0);
2355
2356 if (do_bufctl) {
2357 walk = "bufctl";
2358 freewalk = "freectl";
2359 func = (mdb_walk_cb_t)whatis_walk_bufctl;
2360 } else {
2361 walk = "kmem";
2362 freewalk = "freemem";
2363 func = (mdb_walk_cb_t)whatis_walk_kmem;
2364 }
2365
2366 wi->wi_cache = c;
2367
2368 if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2369 mdb_printf("Searching %s...\n", c->cache_name);
2370
2371 /*
2372 * If more then two buffers live on each slab, figure out if we're
2373 * interested in anything in any slab before doing the more expensive
2374 * kmem/freemem (bufctl/freectl) walkers.
2375 */
2376 wi->wi_slab_size = c->cache_slabsize - c->cache_maxcolor;
2377 if (!(c->cache_flags & KMF_HASH))
2378 wi->wi_slab_size -= sizeof (kmem_slab_t);
2379
2380 if ((wi->wi_slab_size / c->cache_chunksize) > 2) {
2381 wi->wi_slab_found = 0;
2382 if (mdb_pwalk("kmem_slab", (mdb_walk_cb_t)whatis_walk_slab, wi,
2383 addr) == -1) {
2384 mdb_warn("can't find kmem_slab walker");
2385 return (WALK_DONE);
2386 }
2387 if (wi->wi_slab_found == 0)
2388 return (WALK_NEXT);
2389 }
2390
2391 wi->wi_freemem = FALSE;
2392 if (mdb_pwalk(walk, func, wi, addr) == -1) {
2393 mdb_warn("can't find %s walker", walk);
2394 return (WALK_DONE);
2395 }
2396
2397 if (mdb_whatis_done(w))
2398 return (WALK_DONE);
2399
2400 /*
2401 * We have searched for allocated memory; now search for freed memory.
2402 */
2403 if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2404 mdb_printf("Searching %s for free memory...\n", c->cache_name);
2405
2406 wi->wi_freemem = TRUE;
2407 if (mdb_pwalk(freewalk, func, wi, addr) == -1) {
2408 mdb_warn("can't find %s walker", freewalk);
2409 return (WALK_DONE);
2410 }
2411
2412 return (WHATIS_WALKRET(w));
2413 }
2414
2415 static int
whatis_walk_touch(uintptr_t addr,const kmem_cache_t * c,whatis_info_t * wi)2416 whatis_walk_touch(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2417 {
2418 if (c->cache_arena == wi->wi_msb_arena ||
2419 (c->cache_cflags & KMC_NOTOUCH))
2420 return (WALK_NEXT);
2421
2422 return (whatis_walk_cache(addr, c, wi));
2423 }
2424
2425 static int
whatis_walk_metadata(uintptr_t addr,const kmem_cache_t * c,whatis_info_t * wi)2426 whatis_walk_metadata(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2427 {
2428 if (c->cache_arena != wi->wi_msb_arena)
2429 return (WALK_NEXT);
2430
2431 return (whatis_walk_cache(addr, c, wi));
2432 }
2433
2434 static int
whatis_walk_notouch(uintptr_t addr,const kmem_cache_t * c,whatis_info_t * wi)2435 whatis_walk_notouch(uintptr_t addr, const kmem_cache_t *c, whatis_info_t *wi)
2436 {
2437 if (c->cache_arena == wi->wi_msb_arena ||
2438 !(c->cache_cflags & KMC_NOTOUCH))
2439 return (WALK_NEXT);
2440
2441 return (whatis_walk_cache(addr, c, wi));
2442 }
2443
2444 static int
whatis_walk_thread(uintptr_t addr,const kthread_t * t,mdb_whatis_t * w)2445 whatis_walk_thread(uintptr_t addr, const kthread_t *t, mdb_whatis_t *w)
2446 {
2447 uintptr_t cur;
2448 uintptr_t saddr;
2449 size_t size;
2450
2451 /*
2452 * Often, one calls ::whatis on an address from a thread structure.
2453 * We use this opportunity to short circuit this case...
2454 */
2455 while (mdb_whatis_match(w, addr, sizeof (kthread_t), &cur))
2456 mdb_whatis_report_object(w, cur, addr,
2457 "allocated as a thread structure\n");
2458
2459 /*
2460 * Now check the stack
2461 */
2462 if (t->t_stkbase == NULL)
2463 return (WALK_NEXT);
2464
2465 /*
2466 * This assumes that t_stk is the end of the stack, but it's really
2467 * only the initial stack pointer for the thread. Arguments to the
2468 * initial procedure, SA(MINFRAME), etc. are all after t_stk. So
2469 * that 't->t_stk::whatis' reports "part of t's stack", we include
2470 * t_stk in the range (the "+ 1", below), but the kernel should
2471 * really include the full stack bounds where we can find it.
2472 */
2473 saddr = (uintptr_t)t->t_stkbase;
2474 size = (uintptr_t)t->t_stk - saddr + 1;
2475 while (mdb_whatis_match(w, saddr, size, &cur))
2476 mdb_whatis_report_object(w, cur, cur,
2477 "in thread %p's stack%s\n", addr, stack_active(t, cur));
2478
2479 return (WHATIS_WALKRET(w));
2480 }
2481
2482 static void
whatis_modctl_match(mdb_whatis_t * w,const char * name,uintptr_t base,size_t size,const char * where)2483 whatis_modctl_match(mdb_whatis_t *w, const char *name,
2484 uintptr_t base, size_t size, const char *where)
2485 {
2486 uintptr_t cur;
2487
2488 /*
2489 * Since we're searching for addresses inside a module, we report
2490 * them as symbols.
2491 */
2492 while (mdb_whatis_match(w, base, size, &cur))
2493 mdb_whatis_report_address(w, cur, "in %s's %s\n", name, where);
2494 }
2495
2496 static int
whatis_walk_modctl(uintptr_t addr,const struct modctl * m,mdb_whatis_t * w)2497 whatis_walk_modctl(uintptr_t addr, const struct modctl *m, mdb_whatis_t *w)
2498 {
2499 char name[MODMAXNAMELEN];
2500 struct module mod;
2501 Shdr shdr;
2502
2503 if (m->mod_mp == NULL)
2504 return (WALK_NEXT);
2505
2506 if (mdb_vread(&mod, sizeof (mod), (uintptr_t)m->mod_mp) == -1) {
2507 mdb_warn("couldn't read modctl %p's module", addr);
2508 return (WALK_NEXT);
2509 }
2510
2511 if (mdb_readstr(name, sizeof (name), (uintptr_t)m->mod_modname) == -1)
2512 (void) mdb_snprintf(name, sizeof (name), "0x%p", addr);
2513
2514 whatis_modctl_match(w, name,
2515 (uintptr_t)mod.text, mod.text_size, "text segment");
2516 whatis_modctl_match(w, name,
2517 (uintptr_t)mod.data, mod.data_size, "data segment");
2518 whatis_modctl_match(w, name,
2519 (uintptr_t)mod.bss, mod.bss_size, "bss segment");
2520
2521 if (mdb_vread(&shdr, sizeof (shdr), (uintptr_t)mod.symhdr) == -1) {
2522 mdb_warn("couldn't read symbol header for %p's module", addr);
2523 return (WALK_NEXT);
2524 }
2525
2526 whatis_modctl_match(w, name,
2527 (uintptr_t)mod.symtbl, mod.nsyms * shdr.sh_entsize, "symtab");
2528 whatis_modctl_match(w, name,
2529 (uintptr_t)mod.symspace, mod.symsize, "symtab");
2530
2531 return (WHATIS_WALKRET(w));
2532 }
2533
2534 /*ARGSUSED*/
2535 static int
whatis_walk_memseg(uintptr_t addr,const struct memseg * seg,mdb_whatis_t * w)2536 whatis_walk_memseg(uintptr_t addr, const struct memseg *seg, mdb_whatis_t *w)
2537 {
2538 uintptr_t cur;
2539
2540 uintptr_t base = (uintptr_t)seg->pages;
2541 size_t size = (uintptr_t)seg->epages - base;
2542
2543 while (mdb_whatis_match(w, base, size, &cur)) {
2544 /* round our found pointer down to the page_t base. */
2545 size_t offset = (cur - base) % sizeof (page_t);
2546
2547 mdb_whatis_report_object(w, cur, cur - offset,
2548 "allocated as a page structure\n");
2549 }
2550
2551 return (WHATIS_WALKRET(w));
2552 }
2553
2554 /*ARGSUSED*/
2555 static int
whatis_run_modules(mdb_whatis_t * w,void * arg)2556 whatis_run_modules(mdb_whatis_t *w, void *arg)
2557 {
2558 if (mdb_walk("modctl", (mdb_walk_cb_t)whatis_walk_modctl, w) == -1) {
2559 mdb_warn("couldn't find modctl walker");
2560 return (1);
2561 }
2562 return (0);
2563 }
2564
2565 /*ARGSUSED*/
2566 static int
whatis_run_threads(mdb_whatis_t * w,void * ignored)2567 whatis_run_threads(mdb_whatis_t *w, void *ignored)
2568 {
2569 /*
2570 * Now search all thread stacks. Yes, this is a little weak; we
2571 * can save a lot of work by first checking to see if the
2572 * address is in segkp vs. segkmem. But hey, computers are
2573 * fast.
2574 */
2575 if (mdb_walk("thread", (mdb_walk_cb_t)whatis_walk_thread, w) == -1) {
2576 mdb_warn("couldn't find thread walker");
2577 return (1);
2578 }
2579 return (0);
2580 }
2581
2582 /*ARGSUSED*/
2583 static int
whatis_run_pages(mdb_whatis_t * w,void * ignored)2584 whatis_run_pages(mdb_whatis_t *w, void *ignored)
2585 {
2586 if (mdb_walk("memseg", (mdb_walk_cb_t)whatis_walk_memseg, w) == -1) {
2587 mdb_warn("couldn't find memseg walker");
2588 return (1);
2589 }
2590 return (0);
2591 }
2592
2593 /*ARGSUSED*/
2594 static int
whatis_run_kmem(mdb_whatis_t * w,void * ignored)2595 whatis_run_kmem(mdb_whatis_t *w, void *ignored)
2596 {
2597 whatis_info_t wi;
2598
2599 bzero(&wi, sizeof (wi));
2600 wi.wi_w = w;
2601
2602 if (mdb_readvar(&wi.wi_msb_arena, "kmem_msb_arena") == -1)
2603 mdb_warn("unable to readvar \"kmem_msb_arena\"");
2604
2605 if (mdb_readvar(&wi.wi_kmem_lite_count,
2606 "kmem_lite_count") == -1 || wi.wi_kmem_lite_count > 16)
2607 wi.wi_kmem_lite_count = 0;
2608
2609 /*
2610 * We process kmem caches in the following order:
2611 *
2612 * non-KMC_NOTOUCH, non-metadata (typically the most interesting)
2613 * metadata (can be huge with KMF_AUDIT)
2614 * KMC_NOTOUCH, non-metadata (see kmem_walk_all())
2615 */
2616 if (mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_touch,
2617 &wi) == -1 ||
2618 mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_metadata,
2619 &wi) == -1 ||
2620 mdb_walk("kmem_cache", (mdb_walk_cb_t)whatis_walk_notouch,
2621 &wi) == -1) {
2622 mdb_warn("couldn't find kmem_cache walker");
2623 return (1);
2624 }
2625 return (0);
2626 }
2627
2628 /*ARGSUSED*/
2629 static int
whatis_run_vmem(mdb_whatis_t * w,void * ignored)2630 whatis_run_vmem(mdb_whatis_t *w, void *ignored)
2631 {
2632 whatis_info_t wi;
2633
2634 bzero(&wi, sizeof (wi));
2635 wi.wi_w = w;
2636
2637 if (mdb_walk("vmem_postfix",
2638 (mdb_walk_cb_t)whatis_walk_vmem, &wi) == -1) {
2639 mdb_warn("couldn't find vmem_postfix walker");
2640 return (1);
2641 }
2642 return (0);
2643 }
2644
2645 typedef struct kmem_log_cpu {
2646 uintptr_t kmc_low;
2647 uintptr_t kmc_high;
2648 } kmem_log_cpu_t;
2649
2650 typedef struct kmem_log_data {
2651 uintptr_t kmd_addr;
2652 kmem_log_cpu_t *kmd_cpu;
2653 } kmem_log_data_t;
2654
2655 int
kmem_log_walk(uintptr_t addr,const kmem_bufctl_audit_t * b,kmem_log_data_t * kmd)2656 kmem_log_walk(uintptr_t addr, const kmem_bufctl_audit_t *b,
2657 kmem_log_data_t *kmd)
2658 {
2659 int i;
2660 kmem_log_cpu_t *kmc = kmd->kmd_cpu;
2661 size_t bufsize;
2662
2663 for (i = 0; i < NCPU; i++) {
2664 if (addr >= kmc[i].kmc_low && addr < kmc[i].kmc_high)
2665 break;
2666 }
2667
2668 if (kmd->kmd_addr) {
2669 if (b->bc_cache == NULL)
2670 return (WALK_NEXT);
2671
2672 if (mdb_vread(&bufsize, sizeof (bufsize),
2673 (uintptr_t)&b->bc_cache->cache_bufsize) == -1) {
2674 mdb_warn(
2675 "failed to read cache_bufsize for cache at %p",
2676 b->bc_cache);
2677 return (WALK_ERR);
2678 }
2679
2680 if (kmd->kmd_addr < (uintptr_t)b->bc_addr ||
2681 kmd->kmd_addr >= (uintptr_t)b->bc_addr + bufsize)
2682 return (WALK_NEXT);
2683 }
2684
2685 if (i == NCPU)
2686 mdb_printf(" ");
2687 else
2688 mdb_printf("%3d", i);
2689
2690 mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr,
2691 b->bc_timestamp, b->bc_thread);
2692
2693 return (WALK_NEXT);
2694 }
2695
2696 /*ARGSUSED*/
2697 int
kmem_log(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2698 kmem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2699 {
2700 kmem_log_header_t lh;
2701 kmem_cpu_log_header_t clh;
2702 uintptr_t lhp, clhp;
2703 int ncpus;
2704 uintptr_t *cpu;
2705 GElf_Sym sym;
2706 kmem_log_cpu_t *kmc;
2707 int i;
2708 kmem_log_data_t kmd;
2709 uint_t opt_b = FALSE;
2710
2711 if (mdb_getopts(argc, argv,
2712 'b', MDB_OPT_SETBITS, TRUE, &opt_b, NULL) != argc)
2713 return (DCMD_USAGE);
2714
2715 if (mdb_readvar(&lhp, "kmem_transaction_log") == -1) {
2716 mdb_warn("failed to read 'kmem_transaction_log'");
2717 return (DCMD_ERR);
2718 }
2719
2720 if (lhp == NULL) {
2721 mdb_warn("no kmem transaction log\n");
2722 return (DCMD_ERR);
2723 }
2724
2725 mdb_readvar(&ncpus, "ncpus");
2726
2727 if (mdb_vread(&lh, sizeof (kmem_log_header_t), lhp) == -1) {
2728 mdb_warn("failed to read log header at %p", lhp);
2729 return (DCMD_ERR);
2730 }
2731
2732 clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh);
2733
2734 cpu = mdb_alloc(sizeof (uintptr_t) * NCPU, UM_SLEEP | UM_GC);
2735
2736 if (mdb_lookup_by_name("cpu", &sym) == -1) {
2737 mdb_warn("couldn't find 'cpu' array");
2738 return (DCMD_ERR);
2739 }
2740
2741 if (sym.st_size != NCPU * sizeof (uintptr_t)) {
2742 mdb_warn("expected 'cpu' to be of size %d; found %d\n",
2743 NCPU * sizeof (uintptr_t), sym.st_size);
2744 return (DCMD_ERR);
2745 }
2746
2747 if (mdb_vread(cpu, sym.st_size, (uintptr_t)sym.st_value) == -1) {
2748 mdb_warn("failed to read cpu array at %p", sym.st_value);
2749 return (DCMD_ERR);
2750 }
2751
2752 kmc = mdb_zalloc(sizeof (kmem_log_cpu_t) * NCPU, UM_SLEEP | UM_GC);
2753 kmd.kmd_addr = NULL;
2754 kmd.kmd_cpu = kmc;
2755
2756 for (i = 0; i < NCPU; i++) {
2757
2758 if (cpu[i] == NULL)
2759 continue;
2760
2761 if (mdb_vread(&clh, sizeof (clh), clhp) == -1) {
2762 mdb_warn("cannot read cpu %d's log header at %p",
2763 i, clhp);
2764 return (DCMD_ERR);
2765 }
2766
2767 kmc[i].kmc_low = clh.clh_chunk * lh.lh_chunksize +
2768 (uintptr_t)lh.lh_base;
2769 kmc[i].kmc_high = (uintptr_t)clh.clh_current;
2770
2771 clhp += sizeof (kmem_cpu_log_header_t);
2772 }
2773
2774 mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR", "BUFADDR",
2775 "TIMESTAMP", "THREAD");
2776
2777 /*
2778 * If we have been passed an address, print out only log entries
2779 * corresponding to that address. If opt_b is specified, then interpret
2780 * the address as a bufctl.
2781 */
2782 if (flags & DCMD_ADDRSPEC) {
2783 kmem_bufctl_audit_t b;
2784
2785 if (opt_b) {
2786 kmd.kmd_addr = addr;
2787 } else {
2788 if (mdb_vread(&b,
2789 sizeof (kmem_bufctl_audit_t), addr) == -1) {
2790 mdb_warn("failed to read bufctl at %p", addr);
2791 return (DCMD_ERR);
2792 }
2793
2794 (void) kmem_log_walk(addr, &b, &kmd);
2795
2796 return (DCMD_OK);
2797 }
2798 }
2799
2800 if (mdb_walk("kmem_log", (mdb_walk_cb_t)kmem_log_walk, &kmd) == -1) {
2801 mdb_warn("can't find kmem log walker");
2802 return (DCMD_ERR);
2803 }
2804
2805 return (DCMD_OK);
2806 }
2807
2808 typedef struct bufctl_history_cb {
2809 int bhc_flags;
2810 int bhc_argc;
2811 const mdb_arg_t *bhc_argv;
2812 int bhc_ret;
2813 } bufctl_history_cb_t;
2814
2815 /*ARGSUSED*/
2816 static int
bufctl_history_callback(uintptr_t addr,const void * ign,void * arg)2817 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg)
2818 {
2819 bufctl_history_cb_t *bhc = arg;
2820
2821 bhc->bhc_ret =
2822 bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv);
2823
2824 bhc->bhc_flags &= ~DCMD_LOOPFIRST;
2825
2826 return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE);
2827 }
2828
2829 void
bufctl_help(void)2830 bufctl_help(void)
2831 {
2832 mdb_printf("%s",
2833 "Display the contents of kmem_bufctl_audit_ts, with optional filtering.\n\n");
2834 mdb_dec_indent(2);
2835 mdb_printf("%<b>OPTIONS%</b>\n");
2836 mdb_inc_indent(2);
2837 mdb_printf("%s",
2838 " -v Display the full content of the bufctl, including its stack trace\n"
2839 " -h retrieve the bufctl's transaction history, if available\n"
2840 " -a addr\n"
2841 " filter out bufctls not involving the buffer at addr\n"
2842 " -c caller\n"
2843 " filter out bufctls without the function/PC in their stack trace\n"
2844 " -e earliest\n"
2845 " filter out bufctls timestamped before earliest\n"
2846 " -l latest\n"
2847 " filter out bufctls timestamped after latest\n"
2848 " -t thread\n"
2849 " filter out bufctls not involving thread\n");
2850 }
2851
2852 int
bufctl(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2853 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2854 {
2855 kmem_bufctl_audit_t bc;
2856 uint_t verbose = FALSE;
2857 uint_t history = FALSE;
2858 uint_t in_history = FALSE;
2859 uintptr_t caller = NULL, thread = NULL;
2860 uintptr_t laddr, haddr, baddr = NULL;
2861 hrtime_t earliest = 0, latest = 0;
2862 int i, depth;
2863 char c[MDB_SYM_NAMLEN];
2864 GElf_Sym sym;
2865
2866 if (mdb_getopts(argc, argv,
2867 'v', MDB_OPT_SETBITS, TRUE, &verbose,
2868 'h', MDB_OPT_SETBITS, TRUE, &history,
2869 'H', MDB_OPT_SETBITS, TRUE, &in_history, /* internal */
2870 'c', MDB_OPT_UINTPTR, &caller,
2871 't', MDB_OPT_UINTPTR, &thread,
2872 'e', MDB_OPT_UINT64, &earliest,
2873 'l', MDB_OPT_UINT64, &latest,
2874 'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc)
2875 return (DCMD_USAGE);
2876
2877 if (!(flags & DCMD_ADDRSPEC))
2878 return (DCMD_USAGE);
2879
2880 if (in_history && !history)
2881 return (DCMD_USAGE);
2882
2883 if (history && !in_history) {
2884 mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1),
2885 UM_SLEEP | UM_GC);
2886 bufctl_history_cb_t bhc;
2887
2888 nargv[0].a_type = MDB_TYPE_STRING;
2889 nargv[0].a_un.a_str = "-H"; /* prevent recursion */
2890
2891 for (i = 0; i < argc; i++)
2892 nargv[i + 1] = argv[i];
2893
2894 /*
2895 * When in history mode, we treat each element as if it
2896 * were in a seperate loop, so that the headers group
2897 * bufctls with similar histories.
2898 */
2899 bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST;
2900 bhc.bhc_argc = argc + 1;
2901 bhc.bhc_argv = nargv;
2902 bhc.bhc_ret = DCMD_OK;
2903
2904 if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc,
2905 addr) == -1) {
2906 mdb_warn("unable to walk bufctl_history");
2907 return (DCMD_ERR);
2908 }
2909
2910 if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT))
2911 mdb_printf("\n");
2912
2913 return (bhc.bhc_ret);
2914 }
2915
2916 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
2917 if (verbose) {
2918 mdb_printf("%16s %16s %16s %16s\n"
2919 "%<u>%16s %16s %16s %16s%</u>\n",
2920 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD",
2921 "", "CACHE", "LASTLOG", "CONTENTS");
2922 } else {
2923 mdb_printf("%<u>%-?s %-?s %-12s %-?s %s%</u>\n",
2924 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD", "CALLER");
2925 }
2926 }
2927
2928 if (mdb_vread(&bc, sizeof (bc), addr) == -1) {
2929 mdb_warn("couldn't read bufctl at %p", addr);
2930 return (DCMD_ERR);
2931 }
2932
2933 /*
2934 * Guard against bogus bc_depth in case the bufctl is corrupt or
2935 * the address does not really refer to a bufctl.
2936 */
2937 depth = MIN(bc.bc_depth, KMEM_STACK_DEPTH);
2938
2939 if (caller != NULL) {
2940 laddr = caller;
2941 haddr = caller + sizeof (caller);
2942
2943 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c),
2944 &sym) != -1 && caller == (uintptr_t)sym.st_value) {
2945 /*
2946 * We were provided an exact symbol value; any
2947 * address in the function is valid.
2948 */
2949 laddr = (uintptr_t)sym.st_value;
2950 haddr = (uintptr_t)sym.st_value + sym.st_size;
2951 }
2952
2953 for (i = 0; i < depth; i++)
2954 if (bc.bc_stack[i] >= laddr && bc.bc_stack[i] < haddr)
2955 break;
2956
2957 if (i == depth)
2958 return (DCMD_OK);
2959 }
2960
2961 if (thread != NULL && (uintptr_t)bc.bc_thread != thread)
2962 return (DCMD_OK);
2963
2964 if (earliest != 0 && bc.bc_timestamp < earliest)
2965 return (DCMD_OK);
2966
2967 if (latest != 0 && bc.bc_timestamp > latest)
2968 return (DCMD_OK);
2969
2970 if (baddr != 0 && (uintptr_t)bc.bc_addr != baddr)
2971 return (DCMD_OK);
2972
2973 if (flags & DCMD_PIPE_OUT) {
2974 mdb_printf("%#lr\n", addr);
2975 return (DCMD_OK);
2976 }
2977
2978 if (verbose) {
2979 mdb_printf(
2980 "%<b>%16p%</b> %16p %16llx %16p\n"
2981 "%16s %16p %16p %16p\n",
2982 addr, bc.bc_addr, bc.bc_timestamp, bc.bc_thread,
2983 "", bc.bc_cache, bc.bc_lastlog, bc.bc_contents);
2984
2985 mdb_inc_indent(17);
2986 for (i = 0; i < depth; i++)
2987 mdb_printf("%a\n", bc.bc_stack[i]);
2988 mdb_dec_indent(17);
2989 mdb_printf("\n");
2990 } else {
2991 mdb_printf("%0?p %0?p %12llx %0?p", addr, bc.bc_addr,
2992 bc.bc_timestamp, bc.bc_thread);
2993
2994 for (i = 0; i < depth; i++) {
2995 if (mdb_lookup_by_addr(bc.bc_stack[i],
2996 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2997 continue;
2998 if (strncmp(c, "kmem_", 5) == 0)
2999 continue;
3000 mdb_printf(" %a\n", bc.bc_stack[i]);
3001 break;
3002 }
3003
3004 if (i >= depth)
3005 mdb_printf("\n");
3006 }
3007
3008 return (DCMD_OK);
3009 }
3010
3011 typedef struct kmem_verify {
3012 uint64_t *kmv_buf; /* buffer to read cache contents into */
3013 size_t kmv_size; /* number of bytes in kmv_buf */
3014 int kmv_corruption; /* > 0 if corruption found. */
3015 uint_t kmv_flags; /* dcmd flags */
3016 struct kmem_cache kmv_cache; /* the cache we're operating on */
3017 } kmem_verify_t;
3018
3019 /*
3020 * verify_pattern()
3021 * verify that buf is filled with the pattern pat.
3022 */
3023 static int64_t
verify_pattern(uint64_t * buf_arg,size_t size,uint64_t pat)3024 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat)
3025 {
3026 /*LINTED*/
3027 uint64_t *bufend = (uint64_t *)((char *)buf_arg + size);
3028 uint64_t *buf;
3029
3030 for (buf = buf_arg; buf < bufend; buf++)
3031 if (*buf != pat)
3032 return ((uintptr_t)buf - (uintptr_t)buf_arg);
3033 return (-1);
3034 }
3035
3036 /*
3037 * verify_buftag()
3038 * verify that btp->bt_bxstat == (bcp ^ pat)
3039 */
3040 static int
verify_buftag(kmem_buftag_t * btp,uintptr_t pat)3041 verify_buftag(kmem_buftag_t *btp, uintptr_t pat)
3042 {
3043 return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1);
3044 }
3045
3046 /*
3047 * verify_free()
3048 * verify the integrity of a free block of memory by checking
3049 * that it is filled with 0xdeadbeef and that its buftag is sane.
3050 */
3051 /*ARGSUSED1*/
3052 static int
verify_free(uintptr_t addr,const void * data,void * private)3053 verify_free(uintptr_t addr, const void *data, void *private)
3054 {
3055 kmem_verify_t *kmv = (kmem_verify_t *)private;
3056 uint64_t *buf = kmv->kmv_buf; /* buf to validate */
3057 int64_t corrupt; /* corruption offset */
3058 kmem_buftag_t *buftagp; /* ptr to buftag */
3059 kmem_cache_t *cp = &kmv->kmv_cache;
3060 boolean_t besilent = !!(kmv->kmv_flags & (DCMD_LOOP | DCMD_PIPE_OUT));
3061
3062 /*LINTED*/
3063 buftagp = KMEM_BUFTAG(cp, buf);
3064
3065 /*
3066 * Read the buffer to check.
3067 */
3068 if (mdb_vread(buf, kmv->kmv_size, addr) == -1) {
3069 if (!besilent)
3070 mdb_warn("couldn't read %p", addr);
3071 return (WALK_NEXT);
3072 }
3073
3074 if ((corrupt = verify_pattern(buf, cp->cache_verify,
3075 KMEM_FREE_PATTERN)) >= 0) {
3076 if (!besilent)
3077 mdb_printf("buffer %p (free) seems corrupted, at %p\n",
3078 addr, (uintptr_t)addr + corrupt);
3079 goto corrupt;
3080 }
3081 /*
3082 * When KMF_LITE is set, buftagp->bt_redzone is used to hold
3083 * the first bytes of the buffer, hence we cannot check for red
3084 * zone corruption.
3085 */
3086 if ((cp->cache_flags & (KMF_HASH | KMF_LITE)) == KMF_HASH &&
3087 buftagp->bt_redzone != KMEM_REDZONE_PATTERN) {
3088 if (!besilent)
3089 mdb_printf("buffer %p (free) seems to "
3090 "have a corrupt redzone pattern\n", addr);
3091 goto corrupt;
3092 }
3093
3094 /*
3095 * confirm bufctl pointer integrity.
3096 */
3097 if (verify_buftag(buftagp, KMEM_BUFTAG_FREE) == -1) {
3098 if (!besilent)
3099 mdb_printf("buffer %p (free) has a corrupt "
3100 "buftag\n", addr);
3101 goto corrupt;
3102 }
3103
3104 return (WALK_NEXT);
3105 corrupt:
3106 if (kmv->kmv_flags & DCMD_PIPE_OUT)
3107 mdb_printf("%p\n", addr);
3108 kmv->kmv_corruption++;
3109 return (WALK_NEXT);
3110 }
3111
3112 /*
3113 * verify_alloc()
3114 * Verify that the buftag of an allocated buffer makes sense with respect
3115 * to the buffer.
3116 */
3117 /*ARGSUSED1*/
3118 static int
verify_alloc(uintptr_t addr,const void * data,void * private)3119 verify_alloc(uintptr_t addr, const void *data, void *private)
3120 {
3121 kmem_verify_t *kmv = (kmem_verify_t *)private;
3122 kmem_cache_t *cp = &kmv->kmv_cache;
3123 uint64_t *buf = kmv->kmv_buf; /* buf to validate */
3124 /*LINTED*/
3125 kmem_buftag_t *buftagp = KMEM_BUFTAG(cp, buf);
3126 uint32_t *ip = (uint32_t *)buftagp;
3127 uint8_t *bp = (uint8_t *)buf;
3128 int looks_ok = 0, size_ok = 1; /* flags for finding corruption */
3129 boolean_t besilent = !!(kmv->kmv_flags & (DCMD_LOOP | DCMD_PIPE_OUT));
3130
3131 /*
3132 * Read the buffer to check.
3133 */
3134 if (mdb_vread(buf, kmv->kmv_size, addr) == -1) {
3135 if (!besilent)
3136 mdb_warn("couldn't read %p", addr);
3137 return (WALK_NEXT);
3138 }
3139
3140 /*
3141 * There are two cases to handle:
3142 * 1. If the buf was alloc'd using kmem_cache_alloc, it will have
3143 * 0xfeedfacefeedface at the end of it
3144 * 2. If the buf was alloc'd using kmem_alloc, it will have
3145 * 0xbb just past the end of the region in use. At the buftag,
3146 * it will have 0xfeedface (or, if the whole buffer is in use,
3147 * 0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on
3148 * endianness), followed by 32 bits containing the offset of the
3149 * 0xbb byte in the buffer.
3150 *
3151 * Finally, the two 32-bit words that comprise the second half of the
3152 * buftag should xor to KMEM_BUFTAG_ALLOC
3153 */
3154
3155 if (buftagp->bt_redzone == KMEM_REDZONE_PATTERN)
3156 looks_ok = 1;
3157 else if (!KMEM_SIZE_VALID(ip[1]))
3158 size_ok = 0;
3159 else if (bp[KMEM_SIZE_DECODE(ip[1])] == KMEM_REDZONE_BYTE)
3160 looks_ok = 1;
3161 else
3162 size_ok = 0;
3163
3164 if (!size_ok) {
3165 if (!besilent)
3166 mdb_printf("buffer %p (allocated) has a corrupt "
3167 "redzone size encoding\n", addr);
3168 goto corrupt;
3169 }
3170
3171 if (!looks_ok) {
3172 if (!besilent)
3173 mdb_printf("buffer %p (allocated) has a corrupt "
3174 "redzone signature\n", addr);
3175 goto corrupt;
3176 }
3177
3178 if (verify_buftag(buftagp, KMEM_BUFTAG_ALLOC) == -1) {
3179 if (!besilent)
3180 mdb_printf("buffer %p (allocated) has a "
3181 "corrupt buftag\n", addr);
3182 goto corrupt;
3183 }
3184
3185 return (WALK_NEXT);
3186 corrupt:
3187 if (kmv->kmv_flags & DCMD_PIPE_OUT)
3188 mdb_printf("%p\n", addr);
3189
3190 kmv->kmv_corruption++;
3191 return (WALK_NEXT);
3192 }
3193
3194 /*ARGSUSED2*/
3195 int
kmem_verify(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)3196 kmem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3197 {
3198 if (flags & DCMD_ADDRSPEC) {
3199 int check_alloc = 0, check_free = 0;
3200 kmem_verify_t kmv;
3201
3202 if (mdb_vread(&kmv.kmv_cache, sizeof (kmv.kmv_cache),
3203 addr) == -1) {
3204 mdb_warn("couldn't read kmem_cache %p", addr);
3205 return (DCMD_ERR);
3206 }
3207
3208 if ((kmv.kmv_cache.cache_dump.kd_unsafe ||
3209 kmv.kmv_cache.cache_dump.kd_alloc_fails) &&
3210 !(flags & (DCMD_LOOP | DCMD_PIPE_OUT))) {
3211 mdb_warn("WARNING: cache was used during dump: "
3212 "corruption may be incorrectly reported\n");
3213 }
3214
3215 kmv.kmv_size = kmv.kmv_cache.cache_buftag +
3216 sizeof (kmem_buftag_t);
3217 kmv.kmv_buf = mdb_alloc(kmv.kmv_size, UM_SLEEP | UM_GC);
3218 kmv.kmv_corruption = 0;
3219 kmv.kmv_flags = flags;
3220
3221 if ((kmv.kmv_cache.cache_flags & KMF_REDZONE)) {
3222 check_alloc = 1;
3223 if (kmv.kmv_cache.cache_flags & KMF_DEADBEEF)
3224 check_free = 1;
3225 } else {
3226 if (!(flags & DCMD_LOOP)) {
3227 mdb_warn("cache %p (%s) does not have "
3228 "redzone checking enabled\n", addr,
3229 kmv.kmv_cache.cache_name);
3230 }
3231 return (DCMD_ERR);
3232 }
3233
3234 if (!(flags & (DCMD_LOOP | DCMD_PIPE_OUT))) {
3235 mdb_printf("Summary for cache '%s'\n",
3236 kmv.kmv_cache.cache_name);
3237 mdb_inc_indent(2);
3238 }
3239
3240 if (check_alloc)
3241 (void) mdb_pwalk("kmem", verify_alloc, &kmv, addr);
3242 if (check_free)
3243 (void) mdb_pwalk("freemem", verify_free, &kmv, addr);
3244
3245 if (!(flags & DCMD_PIPE_OUT)) {
3246 if (flags & DCMD_LOOP) {
3247 if (kmv.kmv_corruption == 0) {
3248 mdb_printf("%-*s %?p clean\n",
3249 KMEM_CACHE_NAMELEN,
3250 kmv.kmv_cache.cache_name, addr);
3251 } else {
3252 mdb_printf("%-*s %?p %d corrupt "
3253 "buffer%s\n", KMEM_CACHE_NAMELEN,
3254 kmv.kmv_cache.cache_name, addr,
3255 kmv.kmv_corruption,
3256 kmv.kmv_corruption > 1 ? "s" : "");
3257 }
3258 } else {
3259 /*
3260 * This is the more verbose mode, when the user
3261 * typed addr::kmem_verify. If the cache was
3262 * clean, nothing will have yet been printed. So
3263 * say something.
3264 */
3265 if (kmv.kmv_corruption == 0)
3266 mdb_printf("clean\n");
3267
3268 mdb_dec_indent(2);
3269 }
3270 }
3271 } else {
3272 /*
3273 * If the user didn't specify a cache to verify, we'll walk all
3274 * kmem_cache's, specifying ourself as a callback for each...
3275 * this is the equivalent of '::walk kmem_cache .::kmem_verify'
3276 */
3277
3278 if (!(flags & DCMD_PIPE_OUT)) {
3279 uintptr_t dump_curr;
3280 uintptr_t dump_end;
3281
3282 if (mdb_readvar(&dump_curr, "kmem_dump_curr") != -1 &&
3283 mdb_readvar(&dump_end, "kmem_dump_end") != -1 &&
3284 dump_curr == dump_end) {
3285 mdb_warn("WARNING: exceeded kmem_dump_size; "
3286 "corruption may be incorrectly reported\n");
3287 }
3288
3289 mdb_printf("%<u>%-*s %-?s %-20s%</b>\n",
3290 KMEM_CACHE_NAMELEN, "Cache Name", "Addr",
3291 "Cache Integrity");
3292 }
3293
3294 (void) (mdb_walk_dcmd("kmem_cache", "kmem_verify", 0, NULL));
3295 }
3296
3297 return (DCMD_OK);
3298 }
3299
3300 typedef struct vmem_node {
3301 struct vmem_node *vn_next;
3302 struct vmem_node *vn_parent;
3303 struct vmem_node *vn_sibling;
3304 struct vmem_node *vn_children;
3305 uintptr_t vn_addr;
3306 int vn_marked;
3307 vmem_t vn_vmem;
3308 } vmem_node_t;
3309
3310 typedef struct vmem_walk {
3311 vmem_node_t *vw_root;
3312 vmem_node_t *vw_current;
3313 } vmem_walk_t;
3314
3315 int
vmem_walk_init(mdb_walk_state_t * wsp)3316 vmem_walk_init(mdb_walk_state_t *wsp)
3317 {
3318 uintptr_t vaddr, paddr;
3319 vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp;
3320 vmem_walk_t *vw;
3321
3322 if (mdb_readvar(&vaddr, "vmem_list") == -1) {
3323 mdb_warn("couldn't read 'vmem_list'");
3324 return (WALK_ERR);
3325 }
3326
3327 while (vaddr != NULL) {
3328 vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP);
3329 vp->vn_addr = vaddr;
3330 vp->vn_next = head;
3331 head = vp;
3332
3333 if (vaddr == wsp->walk_addr)
3334 current = vp;
3335
3336 if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) {
3337 mdb_warn("couldn't read vmem_t at %p", vaddr);
3338 goto err;
3339 }
3340
3341 vaddr = (uintptr_t)vp->vn_vmem.vm_next;
3342 }
3343
3344 for (vp = head; vp != NULL; vp = vp->vn_next) {
3345
3346 if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == NULL) {
3347 vp->vn_sibling = root;
3348 root = vp;
3349 continue;
3350 }
3351
3352 for (parent = head; parent != NULL; parent = parent->vn_next) {
3353 if (parent->vn_addr != paddr)
3354 continue;
3355 vp->vn_sibling = parent->vn_children;
3356 parent->vn_children = vp;
3357 vp->vn_parent = parent;
3358 break;
3359 }
3360
3361 if (parent == NULL) {
3362 mdb_warn("couldn't find %p's parent (%p)\n",
3363 vp->vn_addr, paddr);
3364 goto err;
3365 }
3366 }
3367
3368 vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP);
3369 vw->vw_root = root;
3370
3371 if (current != NULL)
3372 vw->vw_current = current;
3373 else
3374 vw->vw_current = root;
3375
3376 wsp->walk_data = vw;
3377 return (WALK_NEXT);
3378 err:
3379 for (vp = head; head != NULL; vp = head) {
3380 head = vp->vn_next;
3381 mdb_free(vp, sizeof (vmem_node_t));
3382 }
3383
3384 return (WALK_ERR);
3385 }
3386
3387 int
vmem_walk_step(mdb_walk_state_t * wsp)3388 vmem_walk_step(mdb_walk_state_t *wsp)
3389 {
3390 vmem_walk_t *vw = wsp->walk_data;
3391 vmem_node_t *vp;
3392 int rval;
3393
3394 if ((vp = vw->vw_current) == NULL)
3395 return (WALK_DONE);
3396
3397 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3398
3399 if (vp->vn_children != NULL) {
3400 vw->vw_current = vp->vn_children;
3401 return (rval);
3402 }
3403
3404 do {
3405 vw->vw_current = vp->vn_sibling;
3406 vp = vp->vn_parent;
3407 } while (vw->vw_current == NULL && vp != NULL);
3408
3409 return (rval);
3410 }
3411
3412 /*
3413 * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all
3414 * children are visited before their parent. We perform the postfix walk
3415 * iteratively (rather than recursively) to allow mdb to regain control
3416 * after each callback.
3417 */
3418 int
vmem_postfix_walk_step(mdb_walk_state_t * wsp)3419 vmem_postfix_walk_step(mdb_walk_state_t *wsp)
3420 {
3421 vmem_walk_t *vw = wsp->walk_data;
3422 vmem_node_t *vp = vw->vw_current;
3423 int rval;
3424
3425 /*
3426 * If this node is marked, then we know that we have already visited
3427 * all of its children. If the node has any siblings, they need to
3428 * be visited next; otherwise, we need to visit the parent. Note
3429 * that vp->vn_marked will only be zero on the first invocation of
3430 * the step function.
3431 */
3432 if (vp->vn_marked) {
3433 if (vp->vn_sibling != NULL)
3434 vp = vp->vn_sibling;
3435 else if (vp->vn_parent != NULL)
3436 vp = vp->vn_parent;
3437 else {
3438 /*
3439 * We have neither a parent, nor a sibling, and we
3440 * have already been visited; we're done.
3441 */
3442 return (WALK_DONE);
3443 }
3444 }
3445
3446 /*
3447 * Before we visit this node, visit its children.
3448 */
3449 while (vp->vn_children != NULL && !vp->vn_children->vn_marked)
3450 vp = vp->vn_children;
3451
3452 vp->vn_marked = 1;
3453 vw->vw_current = vp;
3454 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3455
3456 return (rval);
3457 }
3458
3459 void
vmem_walk_fini(mdb_walk_state_t * wsp)3460 vmem_walk_fini(mdb_walk_state_t *wsp)
3461 {
3462 vmem_walk_t *vw = wsp->walk_data;
3463 vmem_node_t *root = vw->vw_root;
3464 int done;
3465
3466 if (root == NULL)
3467 return;
3468
3469 if ((vw->vw_root = root->vn_children) != NULL)
3470 vmem_walk_fini(wsp);
3471
3472 vw->vw_root = root->vn_sibling;
3473 done = (root->vn_sibling == NULL && root->vn_parent == NULL);
3474 mdb_free(root, sizeof (vmem_node_t));
3475
3476 if (done) {
3477 mdb_free(vw, sizeof (vmem_walk_t));
3478 } else {
3479 vmem_walk_fini(wsp);
3480 }
3481 }
3482
3483 typedef struct vmem_seg_walk {
3484 uint8_t vsw_type;
3485 uintptr_t vsw_start;
3486 uintptr_t vsw_current;
3487 } vmem_seg_walk_t;
3488
3489 /*ARGSUSED*/
3490 int
vmem_seg_walk_common_init(mdb_walk_state_t * wsp,uint8_t type,char * name)3491 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name)
3492 {
3493 vmem_seg_walk_t *vsw;
3494
3495 if (wsp->walk_addr == NULL) {
3496 mdb_warn("vmem_%s does not support global walks\n", name);
3497 return (WALK_ERR);
3498 }
3499
3500 wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP);
3501
3502 vsw->vsw_type = type;
3503 vsw->vsw_start = wsp->walk_addr + offsetof(vmem_t, vm_seg0);
3504 vsw->vsw_current = vsw->vsw_start;
3505
3506 return (WALK_NEXT);
3507 }
3508
3509 /*
3510 * vmem segments can't have type 0 (this should be added to vmem_impl.h).
3511 */
3512 #define VMEM_NONE 0
3513
3514 int
vmem_alloc_walk_init(mdb_walk_state_t * wsp)3515 vmem_alloc_walk_init(mdb_walk_state_t *wsp)
3516 {
3517 return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc"));
3518 }
3519
3520 int
vmem_free_walk_init(mdb_walk_state_t * wsp)3521 vmem_free_walk_init(mdb_walk_state_t *wsp)
3522 {
3523 return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free"));
3524 }
3525
3526 int
vmem_span_walk_init(mdb_walk_state_t * wsp)3527 vmem_span_walk_init(mdb_walk_state_t *wsp)
3528 {
3529 return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span"));
3530 }
3531
3532 int
vmem_seg_walk_init(mdb_walk_state_t * wsp)3533 vmem_seg_walk_init(mdb_walk_state_t *wsp)
3534 {
3535 return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg"));
3536 }
3537
3538 int
vmem_seg_walk_step(mdb_walk_state_t * wsp)3539 vmem_seg_walk_step(mdb_walk_state_t *wsp)
3540 {
3541 vmem_seg_t seg;
3542 vmem_seg_walk_t *vsw = wsp->walk_data;
3543 uintptr_t addr = vsw->vsw_current;
3544 static size_t seg_size = 0;
3545 int rval;
3546
3547 if (!seg_size) {
3548 if (mdb_readvar(&seg_size, "vmem_seg_size") == -1) {
3549 mdb_warn("failed to read 'vmem_seg_size'");
3550 seg_size = sizeof (vmem_seg_t);
3551 }
3552 }
3553
3554 if (seg_size < sizeof (seg))
3555 bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size);
3556
3557 if (mdb_vread(&seg, seg_size, addr) == -1) {
3558 mdb_warn("couldn't read vmem_seg at %p", addr);
3559 return (WALK_ERR);
3560 }
3561
3562 vsw->vsw_current = (uintptr_t)seg.vs_anext;
3563 if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) {
3564 rval = WALK_NEXT;
3565 } else {
3566 rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata);
3567 }
3568
3569 if (vsw->vsw_current == vsw->vsw_start)
3570 return (WALK_DONE);
3571
3572 return (rval);
3573 }
3574
3575 void
vmem_seg_walk_fini(mdb_walk_state_t * wsp)3576 vmem_seg_walk_fini(mdb_walk_state_t *wsp)
3577 {
3578 vmem_seg_walk_t *vsw = wsp->walk_data;
3579
3580 mdb_free(vsw, sizeof (vmem_seg_walk_t));
3581 }
3582
3583 #define VMEM_NAMEWIDTH 22
3584
3585 int
vmem(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)3586 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3587 {
3588 vmem_t v, parent;
3589 vmem_kstat_t *vkp = &v.vm_kstat;
3590 uintptr_t paddr;
3591 int ident = 0;
3592 char c[VMEM_NAMEWIDTH];
3593
3594 if (!(flags & DCMD_ADDRSPEC)) {
3595 if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) {
3596 mdb_warn("can't walk vmem");
3597 return (DCMD_ERR);
3598 }
3599 return (DCMD_OK);
3600 }
3601
3602 if (DCMD_HDRSPEC(flags))
3603 mdb_printf("%-?s %-*s %10s %12s %9s %5s\n",
3604 "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE",
3605 "TOTAL", "SUCCEED", "FAIL");
3606
3607 if (mdb_vread(&v, sizeof (v), addr) == -1) {
3608 mdb_warn("couldn't read vmem at %p", addr);
3609 return (DCMD_ERR);
3610 }
3611
3612 for (paddr = (uintptr_t)v.vm_source; paddr != NULL; ident += 2) {
3613 if (mdb_vread(&parent, sizeof (parent), paddr) == -1) {
3614 mdb_warn("couldn't trace %p's ancestry", addr);
3615 ident = 0;
3616 break;
3617 }
3618 paddr = (uintptr_t)parent.vm_source;
3619 }
3620
3621 (void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name);
3622
3623 mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n",
3624 addr, VMEM_NAMEWIDTH, c,
3625 vkp->vk_mem_inuse.value.ui64, vkp->vk_mem_total.value.ui64,
3626 vkp->vk_alloc.value.ui64, vkp->vk_fail.value.ui64);
3627
3628 return (DCMD_OK);
3629 }
3630
3631 void
vmem_seg_help(void)3632 vmem_seg_help(void)
3633 {
3634 mdb_printf("%s",
3635 "Display the contents of vmem_seg_ts, with optional filtering.\n\n"
3636 "\n"
3637 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n"
3638 "representing a single chunk of data. Only ALLOC segments have debugging\n"
3639 "information.\n");
3640 mdb_dec_indent(2);
3641 mdb_printf("%<b>OPTIONS%</b>\n");
3642 mdb_inc_indent(2);
3643 mdb_printf("%s",
3644 " -v Display the full content of the vmem_seg, including its stack trace\n"
3645 " -s report the size of the segment, instead of the end address\n"
3646 " -c caller\n"
3647 " filter out segments without the function/PC in their stack trace\n"
3648 " -e earliest\n"
3649 " filter out segments timestamped before earliest\n"
3650 " -l latest\n"
3651 " filter out segments timestamped after latest\n"
3652 " -m minsize\n"
3653 " filer out segments smaller than minsize\n"
3654 " -M maxsize\n"
3655 " filer out segments larger than maxsize\n"
3656 " -t thread\n"
3657 " filter out segments not involving thread\n"
3658 " -T type\n"
3659 " filter out segments not of type 'type'\n"
3660 " type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n");
3661 }
3662
3663 /*ARGSUSED*/
3664 int
vmem_seg(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)3665 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3666 {
3667 vmem_seg_t vs;
3668 pc_t *stk = vs.vs_stack;
3669 uintptr_t sz;
3670 uint8_t t;
3671 const char *type = NULL;
3672 GElf_Sym sym;
3673 char c[MDB_SYM_NAMLEN];
3674 int no_debug;
3675 int i;
3676 int depth;
3677 uintptr_t laddr, haddr;
3678
3679 uintptr_t caller = NULL, thread = NULL;
3680 uintptr_t minsize = 0, maxsize = 0;
3681
3682 hrtime_t earliest = 0, latest = 0;
3683
3684 uint_t size = 0;
3685 uint_t verbose = 0;
3686
3687 if (!(flags & DCMD_ADDRSPEC))
3688 return (DCMD_USAGE);
3689
3690 if (mdb_getopts(argc, argv,
3691 'c', MDB_OPT_UINTPTR, &caller,
3692 'e', MDB_OPT_UINT64, &earliest,
3693 'l', MDB_OPT_UINT64, &latest,
3694 's', MDB_OPT_SETBITS, TRUE, &size,
3695 'm', MDB_OPT_UINTPTR, &minsize,
3696 'M', MDB_OPT_UINTPTR, &maxsize,
3697 't', MDB_OPT_UINTPTR, &thread,
3698 'T', MDB_OPT_STR, &type,
3699 'v', MDB_OPT_SETBITS, TRUE, &verbose,
3700 NULL) != argc)
3701 return (DCMD_USAGE);
3702
3703 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
3704 if (verbose) {
3705 mdb_printf("%16s %4s %16s %16s %16s\n"
3706 "%<u>%16s %4s %16s %16s %16s%</u>\n",
3707 "ADDR", "TYPE", "START", "END", "SIZE",
3708 "", "", "THREAD", "TIMESTAMP", "");
3709 } else {
3710 mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE",
3711 "START", size? "SIZE" : "END", "WHO");
3712 }
3713 }
3714
3715 if (mdb_vread(&vs, sizeof (vs), addr) == -1) {
3716 mdb_warn("couldn't read vmem_seg at %p", addr);
3717 return (DCMD_ERR);
3718 }
3719
3720 if (type != NULL) {
3721 if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0)
3722 t = VMEM_ALLOC;
3723 else if (strcmp(type, "FREE") == 0)
3724 t = VMEM_FREE;
3725 else if (strcmp(type, "SPAN") == 0)
3726 t = VMEM_SPAN;
3727 else if (strcmp(type, "ROTR") == 0 ||
3728 strcmp(type, "ROTOR") == 0)
3729 t = VMEM_ROTOR;
3730 else if (strcmp(type, "WLKR") == 0 ||
3731 strcmp(type, "WALKER") == 0)
3732 t = VMEM_WALKER;
3733 else {
3734 mdb_warn("\"%s\" is not a recognized vmem_seg type\n",
3735 type);
3736 return (DCMD_ERR);
3737 }
3738
3739 if (vs.vs_type != t)
3740 return (DCMD_OK);
3741 }
3742
3743 sz = vs.vs_end - vs.vs_start;
3744
3745 if (minsize != 0 && sz < minsize)
3746 return (DCMD_OK);
3747
3748 if (maxsize != 0 && sz > maxsize)
3749 return (DCMD_OK);
3750
3751 t = vs.vs_type;
3752 depth = vs.vs_depth;
3753
3754 /*
3755 * debug info, when present, is only accurate for VMEM_ALLOC segments
3756 */
3757 no_debug = (t != VMEM_ALLOC) ||
3758 (depth == 0 || depth > VMEM_STACK_DEPTH);
3759
3760 if (no_debug) {
3761 if (caller != NULL || thread != NULL || earliest != 0 ||
3762 latest != 0)
3763 return (DCMD_OK); /* not enough info */
3764 } else {
3765 if (caller != NULL) {
3766 laddr = caller;
3767 haddr = caller + sizeof (caller);
3768
3769 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c,
3770 sizeof (c), &sym) != -1 &&
3771 caller == (uintptr_t)sym.st_value) {
3772 /*
3773 * We were provided an exact symbol value; any
3774 * address in the function is valid.
3775 */
3776 laddr = (uintptr_t)sym.st_value;
3777 haddr = (uintptr_t)sym.st_value + sym.st_size;
3778 }
3779
3780 for (i = 0; i < depth; i++)
3781 if (vs.vs_stack[i] >= laddr &&
3782 vs.vs_stack[i] < haddr)
3783 break;
3784
3785 if (i == depth)
3786 return (DCMD_OK);
3787 }
3788
3789 if (thread != NULL && (uintptr_t)vs.vs_thread != thread)
3790 return (DCMD_OK);
3791
3792 if (earliest != 0 && vs.vs_timestamp < earliest)
3793 return (DCMD_OK);
3794
3795 if (latest != 0 && vs.vs_timestamp > latest)
3796 return (DCMD_OK);
3797 }
3798
3799 type = (t == VMEM_ALLOC ? "ALLC" :
3800 t == VMEM_FREE ? "FREE" :
3801 t == VMEM_SPAN ? "SPAN" :
3802 t == VMEM_ROTOR ? "ROTR" :
3803 t == VMEM_WALKER ? "WLKR" :
3804 "????");
3805
3806 if (flags & DCMD_PIPE_OUT) {
3807 mdb_printf("%#lr\n", addr);
3808 return (DCMD_OK);
3809 }
3810
3811 if (verbose) {
3812 mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n",
3813 addr, type, vs.vs_start, vs.vs_end, sz);
3814
3815 if (no_debug)
3816 return (DCMD_OK);
3817
3818 mdb_printf("%16s %4s %16p %16llx\n",
3819 "", "", vs.vs_thread, vs.vs_timestamp);
3820
3821 mdb_inc_indent(17);
3822 for (i = 0; i < depth; i++) {
3823 mdb_printf("%a\n", stk[i]);
3824 }
3825 mdb_dec_indent(17);
3826 mdb_printf("\n");
3827 } else {
3828 mdb_printf("%0?p %4s %0?p %0?p", addr, type,
3829 vs.vs_start, size? sz : vs.vs_end);
3830
3831 if (no_debug) {
3832 mdb_printf("\n");
3833 return (DCMD_OK);
3834 }
3835
3836 for (i = 0; i < depth; i++) {
3837 if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY,
3838 c, sizeof (c), &sym) == -1)
3839 continue;
3840 if (strncmp(c, "vmem_", 5) == 0)
3841 continue;
3842 break;
3843 }
3844 mdb_printf(" %a\n", stk[i]);
3845 }
3846 return (DCMD_OK);
3847 }
3848
3849 typedef struct kmalog_data {
3850 uintptr_t kma_addr;
3851 hrtime_t kma_newest;
3852 } kmalog_data_t;
3853
3854 /*ARGSUSED*/
3855 static int
showbc(uintptr_t addr,const kmem_bufctl_audit_t * bcp,kmalog_data_t * kma)3856 showbc(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmalog_data_t *kma)
3857 {
3858 char name[KMEM_CACHE_NAMELEN + 1];
3859 hrtime_t delta;
3860 int i, depth;
3861 size_t bufsize;
3862
3863 if (bcp->bc_timestamp == 0)
3864 return (WALK_DONE);
3865
3866 if (kma->kma_newest == 0)
3867 kma->kma_newest = bcp->bc_timestamp;
3868
3869 if (kma->kma_addr) {
3870 if (mdb_vread(&bufsize, sizeof (bufsize),
3871 (uintptr_t)&bcp->bc_cache->cache_bufsize) == -1) {
3872 mdb_warn(
3873 "failed to read cache_bufsize for cache at %p",
3874 bcp->bc_cache);
3875 return (WALK_ERR);
3876 }
3877
3878 if (kma->kma_addr < (uintptr_t)bcp->bc_addr ||
3879 kma->kma_addr >= (uintptr_t)bcp->bc_addr + bufsize)
3880 return (WALK_NEXT);
3881 }
3882
3883 delta = kma->kma_newest - bcp->bc_timestamp;
3884 depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
3885
3886 if (mdb_readstr(name, sizeof (name), (uintptr_t)
3887 &bcp->bc_cache->cache_name) <= 0)
3888 (void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache);
3889
3890 mdb_printf("\nT-%lld.%09lld addr=%p %s\n",
3891 delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name);
3892
3893 for (i = 0; i < depth; i++)
3894 mdb_printf("\t %a\n", bcp->bc_stack[i]);
3895
3896 return (WALK_NEXT);
3897 }
3898
3899 int
kmalog(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)3900 kmalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3901 {
3902 const char *logname = "kmem_transaction_log";
3903 kmalog_data_t kma;
3904
3905 if (argc > 1)
3906 return (DCMD_USAGE);
3907
3908 kma.kma_newest = 0;
3909 if (flags & DCMD_ADDRSPEC)
3910 kma.kma_addr = addr;
3911 else
3912 kma.kma_addr = NULL;
3913
3914 if (argc > 0) {
3915 if (argv->a_type != MDB_TYPE_STRING)
3916 return (DCMD_USAGE);
3917 if (strcmp(argv->a_un.a_str, "fail") == 0)
3918 logname = "kmem_failure_log";
3919 else if (strcmp(argv->a_un.a_str, "slab") == 0)
3920 logname = "kmem_slab_log";
3921 else
3922 return (DCMD_USAGE);
3923 }
3924
3925 if (mdb_readvar(&addr, logname) == -1) {
3926 mdb_warn("failed to read %s log header pointer");
3927 return (DCMD_ERR);
3928 }
3929
3930 if (mdb_pwalk("kmem_log", (mdb_walk_cb_t)showbc, &kma, addr) == -1) {
3931 mdb_warn("failed to walk kmem log");
3932 return (DCMD_ERR);
3933 }
3934
3935 return (DCMD_OK);
3936 }
3937
3938 /*
3939 * As the final lure for die-hard crash(1M) users, we provide ::kmausers here.
3940 * The first piece is a structure which we use to accumulate kmem_cache_t
3941 * addresses of interest. The kmc_add is used as a callback for the kmem_cache
3942 * walker; we either add all caches, or ones named explicitly as arguments.
3943 */
3944
3945 typedef struct kmclist {
3946 const char *kmc_name; /* Name to match (or NULL) */
3947 uintptr_t *kmc_caches; /* List of kmem_cache_t addrs */
3948 int kmc_nelems; /* Num entries in kmc_caches */
3949 int kmc_size; /* Size of kmc_caches array */
3950 } kmclist_t;
3951
3952 static int
kmc_add(uintptr_t addr,const kmem_cache_t * cp,kmclist_t * kmc)3953 kmc_add(uintptr_t addr, const kmem_cache_t *cp, kmclist_t *kmc)
3954 {
3955 void *p;
3956 int s;
3957
3958 if (kmc->kmc_name == NULL ||
3959 strcmp(cp->cache_name, kmc->kmc_name) == 0) {
3960 /*
3961 * If we have a match, grow our array (if necessary), and then
3962 * add the virtual address of the matching cache to our list.
3963 */
3964 if (kmc->kmc_nelems >= kmc->kmc_size) {
3965 s = kmc->kmc_size ? kmc->kmc_size * 2 : 256;
3966 p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC);
3967
3968 bcopy(kmc->kmc_caches, p,
3969 sizeof (uintptr_t) * kmc->kmc_size);
3970
3971 kmc->kmc_caches = p;
3972 kmc->kmc_size = s;
3973 }
3974
3975 kmc->kmc_caches[kmc->kmc_nelems++] = addr;
3976 return (kmc->kmc_name ? WALK_DONE : WALK_NEXT);
3977 }
3978
3979 return (WALK_NEXT);
3980 }
3981
3982 /*
3983 * The second piece of ::kmausers is a hash table of allocations. Each
3984 * allocation owner is identified by its stack trace and data_size. We then
3985 * track the total bytes of all such allocations, and the number of allocations
3986 * to report at the end. Once we have a list of caches, we walk through the
3987 * allocated bufctls of each, and update our hash table accordingly.
3988 */
3989
3990 typedef struct kmowner {
3991 struct kmowner *kmo_head; /* First hash elt in bucket */
3992 struct kmowner *kmo_next; /* Next hash elt in chain */
3993 size_t kmo_signature; /* Hash table signature */
3994 uint_t kmo_num; /* Number of allocations */
3995 size_t kmo_data_size; /* Size of each allocation */
3996 size_t kmo_total_size; /* Total bytes of allocation */
3997 int kmo_depth; /* Depth of stack trace */
3998 uintptr_t kmo_stack[KMEM_STACK_DEPTH]; /* Stack trace */
3999 } kmowner_t;
4000
4001 typedef struct kmusers {
4002 uintptr_t kmu_addr; /* address of interest */
4003 const kmem_cache_t *kmu_cache; /* Current kmem cache */
4004 kmowner_t *kmu_hash; /* Hash table of owners */
4005 int kmu_nelems; /* Number of entries in use */
4006 int kmu_size; /* Total number of entries */
4007 } kmusers_t;
4008
4009 static void
kmu_add(kmusers_t * kmu,const kmem_bufctl_audit_t * bcp,size_t size,size_t data_size)4010 kmu_add(kmusers_t *kmu, const kmem_bufctl_audit_t *bcp,
4011 size_t size, size_t data_size)
4012 {
4013 int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
4014 size_t bucket, signature = data_size;
4015 kmowner_t *kmo, *kmoend;
4016
4017 /*
4018 * If the hash table is full, double its size and rehash everything.
4019 */
4020 if (kmu->kmu_nelems >= kmu->kmu_size) {
4021 int s = kmu->kmu_size ? kmu->kmu_size * 2 : 1024;
4022
4023 kmo = mdb_alloc(sizeof (kmowner_t) * s, UM_SLEEP | UM_GC);
4024 bcopy(kmu->kmu_hash, kmo, sizeof (kmowner_t) * kmu->kmu_size);
4025 kmu->kmu_hash = kmo;
4026 kmu->kmu_size = s;
4027
4028 kmoend = kmu->kmu_hash + kmu->kmu_size;
4029 for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++)
4030 kmo->kmo_head = NULL;
4031
4032 kmoend = kmu->kmu_hash + kmu->kmu_nelems;
4033 for (kmo = kmu->kmu_hash; kmo < kmoend; kmo++) {
4034 bucket = kmo->kmo_signature & (kmu->kmu_size - 1);
4035 kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head;
4036 kmu->kmu_hash[bucket].kmo_head = kmo;
4037 }
4038 }
4039
4040 /*
4041 * Finish computing the hash signature from the stack trace, and then
4042 * see if the owner is in the hash table. If so, update our stats.
4043 */
4044 for (i = 0; i < depth; i++)
4045 signature += bcp->bc_stack[i];
4046
4047 bucket = signature & (kmu->kmu_size - 1);
4048
4049 for (kmo = kmu->kmu_hash[bucket].kmo_head; kmo; kmo = kmo->kmo_next) {
4050 if (kmo->kmo_signature == signature) {
4051 size_t difference = 0;
4052
4053 difference |= kmo->kmo_data_size - data_size;
4054 difference |= kmo->kmo_depth - depth;
4055
4056 for (i = 0; i < depth; i++) {
4057 difference |= kmo->kmo_stack[i] -
4058 bcp->bc_stack[i];
4059 }
4060
4061 if (difference == 0) {
4062 kmo->kmo_total_size += size;
4063 kmo->kmo_num++;
4064 return;
4065 }
4066 }
4067 }
4068
4069 /*
4070 * If the owner is not yet hashed, grab the next element and fill it
4071 * in based on the allocation information.
4072 */
4073 kmo = &kmu->kmu_hash[kmu->kmu_nelems++];
4074 kmo->kmo_next = kmu->kmu_hash[bucket].kmo_head;
4075 kmu->kmu_hash[bucket].kmo_head = kmo;
4076
4077 kmo->kmo_signature = signature;
4078 kmo->kmo_num = 1;
4079 kmo->kmo_data_size = data_size;
4080 kmo->kmo_total_size = size;
4081 kmo->kmo_depth = depth;
4082
4083 for (i = 0; i < depth; i++)
4084 kmo->kmo_stack[i] = bcp->bc_stack[i];
4085 }
4086
4087 /*
4088 * When ::kmausers is invoked without the -f flag, we simply update our hash
4089 * table with the information from each allocated bufctl.
4090 */
4091 /*ARGSUSED*/
4092 static int
kmause1(uintptr_t addr,const kmem_bufctl_audit_t * bcp,kmusers_t * kmu)4093 kmause1(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu)
4094 {
4095 const kmem_cache_t *cp = kmu->kmu_cache;
4096
4097 kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize);
4098 return (WALK_NEXT);
4099 }
4100
4101 /*
4102 * When ::kmausers is invoked with the -f flag, we print out the information
4103 * for each bufctl as well as updating the hash table.
4104 */
4105 static int
kmause2(uintptr_t addr,const kmem_bufctl_audit_t * bcp,kmusers_t * kmu)4106 kmause2(uintptr_t addr, const kmem_bufctl_audit_t *bcp, kmusers_t *kmu)
4107 {
4108 int i, depth = MIN(bcp->bc_depth, KMEM_STACK_DEPTH);
4109 const kmem_cache_t *cp = kmu->kmu_cache;
4110 kmem_bufctl_t bufctl;
4111
4112 if (kmu->kmu_addr) {
4113 if (mdb_vread(&bufctl, sizeof (bufctl), addr) == -1)
4114 mdb_warn("couldn't read bufctl at %p", addr);
4115 else if (kmu->kmu_addr < (uintptr_t)bufctl.bc_addr ||
4116 kmu->kmu_addr >= (uintptr_t)bufctl.bc_addr +
4117 cp->cache_bufsize)
4118 return (WALK_NEXT);
4119 }
4120
4121 mdb_printf("size %d, addr %p, thread %p, cache %s\n",
4122 cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name);
4123
4124 for (i = 0; i < depth; i++)
4125 mdb_printf("\t %a\n", bcp->bc_stack[i]);
4126
4127 kmu_add(kmu, bcp, cp->cache_bufsize, cp->cache_bufsize);
4128 return (WALK_NEXT);
4129 }
4130
4131 /*
4132 * We sort our results by allocation size before printing them.
4133 */
4134 static int
kmownercmp(const void * lp,const void * rp)4135 kmownercmp(const void *lp, const void *rp)
4136 {
4137 const kmowner_t *lhs = lp;
4138 const kmowner_t *rhs = rp;
4139
4140 return (rhs->kmo_total_size - lhs->kmo_total_size);
4141 }
4142
4143 /*
4144 * The main engine of ::kmausers is relatively straightforward: First we
4145 * accumulate our list of kmem_cache_t addresses into the kmclist_t. Next we
4146 * iterate over the allocated bufctls of each cache in the list. Finally,
4147 * we sort and print our results.
4148 */
4149 /*ARGSUSED*/
4150 int
kmausers(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)4151 kmausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4152 {
4153 int mem_threshold = 8192; /* Minimum # bytes for printing */
4154 int cnt_threshold = 100; /* Minimum # blocks for printing */
4155 int audited_caches = 0; /* Number of KMF_AUDIT caches found */
4156 int do_all_caches = 1; /* Do all caches (no arguments) */
4157 int opt_e = FALSE; /* Include "small" users */
4158 int opt_f = FALSE; /* Print stack traces */
4159
4160 mdb_walk_cb_t callback = (mdb_walk_cb_t)kmause1;
4161 kmowner_t *kmo, *kmoend;
4162 int i, oelems;
4163
4164 kmclist_t kmc;
4165 kmusers_t kmu;
4166
4167 bzero(&kmc, sizeof (kmc));
4168 bzero(&kmu, sizeof (kmu));
4169
4170 while ((i = mdb_getopts(argc, argv,
4171 'e', MDB_OPT_SETBITS, TRUE, &opt_e,
4172 'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) {
4173
4174 argv += i; /* skip past options we just processed */
4175 argc -= i; /* adjust argc */
4176
4177 if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-')
4178 return (DCMD_USAGE);
4179
4180 oelems = kmc.kmc_nelems;
4181 kmc.kmc_name = argv->a_un.a_str;
4182 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc);
4183
4184 if (kmc.kmc_nelems == oelems) {
4185 mdb_warn("unknown kmem cache: %s\n", kmc.kmc_name);
4186 return (DCMD_ERR);
4187 }
4188
4189 do_all_caches = 0;
4190 argv++;
4191 argc--;
4192 }
4193
4194 if (flags & DCMD_ADDRSPEC) {
4195 opt_f = TRUE;
4196 kmu.kmu_addr = addr;
4197 } else {
4198 kmu.kmu_addr = NULL;
4199 }
4200
4201 if (opt_e)
4202 mem_threshold = cnt_threshold = 0;
4203
4204 if (opt_f)
4205 callback = (mdb_walk_cb_t)kmause2;
4206
4207 if (do_all_caches) {
4208 kmc.kmc_name = NULL; /* match all cache names */
4209 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmc_add, &kmc);
4210 }
4211
4212 for (i = 0; i < kmc.kmc_nelems; i++) {
4213 uintptr_t cp = kmc.kmc_caches[i];
4214 kmem_cache_t c;
4215
4216 if (mdb_vread(&c, sizeof (c), cp) == -1) {
4217 mdb_warn("failed to read cache at %p", cp);
4218 continue;
4219 }
4220
4221 if (!(c.cache_flags & KMF_AUDIT)) {
4222 if (!do_all_caches) {
4223 mdb_warn("KMF_AUDIT is not enabled for %s\n",
4224 c.cache_name);
4225 }
4226 continue;
4227 }
4228
4229 kmu.kmu_cache = &c;
4230 (void) mdb_pwalk("bufctl", callback, &kmu, cp);
4231 audited_caches++;
4232 }
4233
4234 if (audited_caches == 0 && do_all_caches) {
4235 mdb_warn("KMF_AUDIT is not enabled for any caches\n");
4236 return (DCMD_ERR);
4237 }
4238
4239 qsort(kmu.kmu_hash, kmu.kmu_nelems, sizeof (kmowner_t), kmownercmp);
4240 kmoend = kmu.kmu_hash + kmu.kmu_nelems;
4241
4242 for (kmo = kmu.kmu_hash; kmo < kmoend; kmo++) {
4243 if (kmo->kmo_total_size < mem_threshold &&
4244 kmo->kmo_num < cnt_threshold)
4245 continue;
4246 mdb_printf("%lu bytes for %u allocations with data size %lu:\n",
4247 kmo->kmo_total_size, kmo->kmo_num, kmo->kmo_data_size);
4248 for (i = 0; i < kmo->kmo_depth; i++)
4249 mdb_printf("\t %a\n", kmo->kmo_stack[i]);
4250 }
4251
4252 return (DCMD_OK);
4253 }
4254
4255 void
kmausers_help(void)4256 kmausers_help(void)
4257 {
4258 mdb_printf(
4259 "Displays the largest users of the kmem allocator, sorted by \n"
4260 "trace. If one or more caches is specified, only those caches\n"
4261 "will be searched. By default, all caches are searched. If an\n"
4262 "address is specified, then only those allocations which include\n"
4263 "the given address are displayed. Specifying an address implies\n"
4264 "-f.\n"
4265 "\n"
4266 "\t-e\tInclude all users, not just the largest\n"
4267 "\t-f\tDisplay individual allocations. By default, users are\n"
4268 "\t\tgrouped by stack\n");
4269 }
4270
4271 static int
kmem_ready_check(void)4272 kmem_ready_check(void)
4273 {
4274 int ready;
4275
4276 if (mdb_readvar(&ready, "kmem_ready") < 0)
4277 return (-1); /* errno is set for us */
4278
4279 return (ready);
4280 }
4281
4282 void
kmem_statechange(void)4283 kmem_statechange(void)
4284 {
4285 static int been_ready = 0;
4286
4287 if (been_ready)
4288 return;
4289
4290 if (kmem_ready_check() <= 0)
4291 return;
4292
4293 been_ready = 1;
4294 (void) mdb_walk("kmem_cache", (mdb_walk_cb_t)kmem_init_walkers, NULL);
4295 }
4296
4297 void
kmem_init(void)4298 kmem_init(void)
4299 {
4300 mdb_walker_t w = {
4301 "kmem_cache", "walk list of kmem caches", kmem_cache_walk_init,
4302 list_walk_step, list_walk_fini
4303 };
4304
4305 /*
4306 * If kmem is ready, we'll need to invoke the kmem_cache walker
4307 * immediately. Walkers in the linkage structure won't be ready until
4308 * _mdb_init returns, so we'll need to add this one manually. If kmem
4309 * is ready, we'll use the walker to initialize the caches. If kmem
4310 * isn't ready, we'll register a callback that will allow us to defer
4311 * cache walking until it is.
4312 */
4313 if (mdb_add_walker(&w) != 0) {
4314 mdb_warn("failed to add kmem_cache walker");
4315 return;
4316 }
4317
4318 kmem_statechange();
4319
4320 /* register our ::whatis handlers */
4321 mdb_whatis_register("modules", whatis_run_modules, NULL,
4322 WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID);
4323 mdb_whatis_register("threads", whatis_run_threads, NULL,
4324 WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID);
4325 mdb_whatis_register("pages", whatis_run_pages, NULL,
4326 WHATIS_PRIO_EARLY, WHATIS_REG_NO_ID);
4327 mdb_whatis_register("kmem", whatis_run_kmem, NULL,
4328 WHATIS_PRIO_ALLOCATOR, 0);
4329 mdb_whatis_register("vmem", whatis_run_vmem, NULL,
4330 WHATIS_PRIO_ALLOCATOR, 0);
4331 }
4332
4333 typedef struct whatthread {
4334 uintptr_t wt_target;
4335 int wt_verbose;
4336 } whatthread_t;
4337
4338 static int
whatthread_walk_thread(uintptr_t addr,const kthread_t * t,whatthread_t * w)4339 whatthread_walk_thread(uintptr_t addr, const kthread_t *t, whatthread_t *w)
4340 {
4341 uintptr_t current, data;
4342
4343 if (t->t_stkbase == NULL)
4344 return (WALK_NEXT);
4345
4346 /*
4347 * Warn about swapped out threads, but drive on anyway
4348 */
4349 if (!(t->t_schedflag & TS_LOAD)) {
4350 mdb_warn("thread %p's stack swapped out\n", addr);
4351 return (WALK_NEXT);
4352 }
4353
4354 /*
4355 * Search the thread's stack for the given pointer. Note that it would
4356 * be more efficient to follow ::kgrep's lead and read in page-sized
4357 * chunks, but this routine is already fast and simple.
4358 */
4359 for (current = (uintptr_t)t->t_stkbase; current < (uintptr_t)t->t_stk;
4360 current += sizeof (uintptr_t)) {
4361 if (mdb_vread(&data, sizeof (data), current) == -1) {
4362 mdb_warn("couldn't read thread %p's stack at %p",
4363 addr, current);
4364 return (WALK_ERR);
4365 }
4366
4367 if (data == w->wt_target) {
4368 if (w->wt_verbose) {
4369 mdb_printf("%p in thread %p's stack%s\n",
4370 current, addr, stack_active(t, current));
4371 } else {
4372 mdb_printf("%#lr\n", addr);
4373 return (WALK_NEXT);
4374 }
4375 }
4376 }
4377
4378 return (WALK_NEXT);
4379 }
4380
4381 int
whatthread(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)4382 whatthread(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4383 {
4384 whatthread_t w;
4385
4386 if (!(flags & DCMD_ADDRSPEC))
4387 return (DCMD_USAGE);
4388
4389 w.wt_verbose = FALSE;
4390 w.wt_target = addr;
4391
4392 if (mdb_getopts(argc, argv,
4393 'v', MDB_OPT_SETBITS, TRUE, &w.wt_verbose, NULL) != argc)
4394 return (DCMD_USAGE);
4395
4396 if (mdb_walk("thread", (mdb_walk_cb_t)whatthread_walk_thread, &w)
4397 == -1) {
4398 mdb_warn("couldn't walk threads");
4399 return (DCMD_ERR);
4400 }
4401
4402 return (DCMD_OK);
4403 }
4404