1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * Copyright 2012 Joyent, Inc. All rights reserved.
28 * Copyright (c) 2013 by Delphix. All rights reserved.
29 */
30
31 #include "umem.h"
32
33 #include <sys/vmem_impl_user.h>
34 #include <umem_impl.h>
35
36 #include <alloca.h>
37 #include <limits.h>
38 #include <mdb/mdb_whatis.h>
39 #include <thr_uberdata.h>
40
41 #include "misc.h"
42 #include "leaky.h"
43 #include "dist.h"
44
45 #include "umem_pagesize.h"
46
47 #define UM_ALLOCATED 0x1
48 #define UM_FREE 0x2
49 #define UM_BUFCTL 0x4
50 #define UM_HASH 0x8
51
52 int umem_ready;
53
54 static int umem_stack_depth_warned;
55 static uint32_t umem_max_ncpus;
56 uint32_t umem_stack_depth;
57
58 size_t umem_pagesize;
59
60 #define UMEM_READVAR(var) \
61 (umem_readvar(&(var), #var) == -1 && \
62 (mdb_warn("failed to read "#var), 1))
63
64 int
umem_update_variables(void)65 umem_update_variables(void)
66 {
67 size_t pagesize;
68
69 /*
70 * Figure out which type of umem is being used; if it's not there
71 * yet, succeed quietly.
72 */
73 if (umem_set_standalone() == -1) {
74 umem_ready = 0;
75 return (0); /* umem not there yet */
76 }
77
78 /*
79 * Solaris 9 used a different name for umem_max_ncpus. It's
80 * cheap backwards compatibility to check for both names.
81 */
82 if (umem_readvar(&umem_max_ncpus, "umem_max_ncpus") == -1 &&
83 umem_readvar(&umem_max_ncpus, "max_ncpus") == -1) {
84 mdb_warn("unable to read umem_max_ncpus or max_ncpus");
85 return (-1);
86 }
87 if (UMEM_READVAR(umem_ready))
88 return (-1);
89 if (UMEM_READVAR(umem_stack_depth))
90 return (-1);
91 if (UMEM_READVAR(pagesize))
92 return (-1);
93
94 if (umem_stack_depth > UMEM_MAX_STACK_DEPTH) {
95 if (umem_stack_depth_warned == 0) {
96 mdb_warn("umem_stack_depth corrupted (%d > %d)\n",
97 umem_stack_depth, UMEM_MAX_STACK_DEPTH);
98 umem_stack_depth_warned = 1;
99 }
100 umem_stack_depth = 0;
101 }
102
103 umem_pagesize = pagesize;
104
105 return (0);
106 }
107
108 static int
umem_ptc_walk_init(mdb_walk_state_t * wsp)109 umem_ptc_walk_init(mdb_walk_state_t *wsp)
110 {
111 if (wsp->walk_addr == NULL) {
112 if (mdb_layered_walk("ulwp", wsp) == -1) {
113 mdb_warn("couldn't walk 'ulwp'");
114 return (WALK_ERR);
115 }
116 }
117
118 return (WALK_NEXT);
119 }
120
121 static int
umem_ptc_walk_step(mdb_walk_state_t * wsp)122 umem_ptc_walk_step(mdb_walk_state_t *wsp)
123 {
124 uintptr_t this;
125 int rval;
126
127 if (wsp->walk_layer != NULL) {
128 this = (uintptr_t)((ulwp_t *)wsp->walk_layer)->ul_self +
129 (uintptr_t)wsp->walk_arg;
130 } else {
131 this = wsp->walk_addr + (uintptr_t)wsp->walk_arg;
132 }
133
134 for (;;) {
135 if (mdb_vread(&this, sizeof (void *), this) == -1) {
136 mdb_warn("couldn't read ptc buffer at %p", this);
137 return (WALK_ERR);
138 }
139
140 if (this == NULL)
141 break;
142
143 rval = wsp->walk_callback(this, &this, wsp->walk_cbdata);
144
145 if (rval != WALK_NEXT)
146 return (rval);
147 }
148
149 return (wsp->walk_layer != NULL ? WALK_NEXT : WALK_DONE);
150 }
151
152 /*ARGSUSED*/
153 static int
umem_init_walkers(uintptr_t addr,const umem_cache_t * c,int * sizes)154 umem_init_walkers(uintptr_t addr, const umem_cache_t *c, int *sizes)
155 {
156 mdb_walker_t w;
157 char descr[64];
158 char name[64];
159 int i;
160
161 (void) mdb_snprintf(descr, sizeof (descr),
162 "walk the %s cache", c->cache_name);
163
164 w.walk_name = c->cache_name;
165 w.walk_descr = descr;
166 w.walk_init = umem_walk_init;
167 w.walk_step = umem_walk_step;
168 w.walk_fini = umem_walk_fini;
169 w.walk_init_arg = (void *)addr;
170
171 if (mdb_add_walker(&w) == -1)
172 mdb_warn("failed to add %s walker", c->cache_name);
173
174 if (!(c->cache_flags & UMF_PTC))
175 return (WALK_NEXT);
176
177 /*
178 * For the per-thread cache walker, the address is the offset in the
179 * tm_roots[] array of the ulwp_t.
180 */
181 for (i = 0; sizes[i] != 0; i++) {
182 if (sizes[i] == c->cache_bufsize)
183 break;
184 }
185
186 if (sizes[i] == 0) {
187 mdb_warn("cache %s is cached per-thread, but could not find "
188 "size in umem_alloc_sizes\n", c->cache_name);
189 return (WALK_NEXT);
190 }
191
192 if (i >= NTMEMBASE) {
193 mdb_warn("index for %s (%d) exceeds root slots (%d)\n",
194 c->cache_name, i, NTMEMBASE);
195 return (WALK_NEXT);
196 }
197
198 (void) mdb_snprintf(name, sizeof (name),
199 "umem_ptc_%d", c->cache_bufsize);
200 (void) mdb_snprintf(descr, sizeof (descr),
201 "walk the per-thread cache for %s", c->cache_name);
202
203 w.walk_name = name;
204 w.walk_descr = descr;
205 w.walk_init = umem_ptc_walk_init;
206 w.walk_step = umem_ptc_walk_step;
207 w.walk_fini = NULL;
208 w.walk_init_arg = (void *)offsetof(ulwp_t, ul_tmem.tm_roots[i]);
209
210 if (mdb_add_walker(&w) == -1)
211 mdb_warn("failed to add %s walker", w.walk_name);
212
213 return (WALK_NEXT);
214 }
215
216 /*ARGSUSED*/
217 static void
umem_statechange_cb(void * arg)218 umem_statechange_cb(void *arg)
219 {
220 static int been_ready = 0;
221 GElf_Sym sym;
222 int *sizes;
223
224 #ifndef _KMDB
225 leaky_cleanup(1); /* state changes invalidate leaky state */
226 #endif
227
228 if (umem_update_variables() == -1)
229 return;
230
231 if (been_ready)
232 return;
233
234 if (umem_ready != UMEM_READY)
235 return;
236
237 been_ready = 1;
238
239 /*
240 * In order to determine the tm_roots offset of any cache that is
241 * cached per-thread, we need to have the umem_alloc_sizes array.
242 * Read this, assuring that it is zero-terminated.
243 */
244 if (umem_lookup_by_name("umem_alloc_sizes", &sym) == -1) {
245 mdb_warn("unable to lookup 'umem_alloc_sizes'");
246 return;
247 }
248
249 sizes = mdb_zalloc(sym.st_size + sizeof (int), UM_SLEEP | UM_GC);
250
251 if (mdb_vread(sizes, sym.st_size, (uintptr_t)sym.st_value) == -1) {
252 mdb_warn("couldn't read 'umem_alloc_sizes'");
253 return;
254 }
255
256 (void) mdb_walk("umem_cache", (mdb_walk_cb_t)umem_init_walkers, sizes);
257 }
258
259 int
umem_abort_messages(void)260 umem_abort_messages(void)
261 {
262 char *umem_error_buffer;
263 uint_t umem_error_begin;
264 GElf_Sym sym;
265 size_t bufsize;
266
267 if (UMEM_READVAR(umem_error_begin))
268 return (DCMD_ERR);
269
270 if (umem_lookup_by_name("umem_error_buffer", &sym) == -1) {
271 mdb_warn("unable to look up umem_error_buffer");
272 return (DCMD_ERR);
273 }
274
275 bufsize = (size_t)sym.st_size;
276
277 umem_error_buffer = mdb_alloc(bufsize+1, UM_SLEEP | UM_GC);
278
279 if (mdb_vread(umem_error_buffer, bufsize, (uintptr_t)sym.st_value)
280 != bufsize) {
281 mdb_warn("unable to read umem_error_buffer");
282 return (DCMD_ERR);
283 }
284 /* put a zero after the end of the buffer to simplify printing */
285 umem_error_buffer[bufsize] = 0;
286
287 if ((umem_error_begin % bufsize) == 0)
288 mdb_printf("%s\n", umem_error_buffer);
289 else {
290 umem_error_buffer[(umem_error_begin % bufsize) - 1] = 0;
291 mdb_printf("%s%s\n",
292 &umem_error_buffer[umem_error_begin % bufsize],
293 umem_error_buffer);
294 }
295
296 return (DCMD_OK);
297 }
298
299 static void
umem_log_status(const char * name,umem_log_header_t * val)300 umem_log_status(const char *name, umem_log_header_t *val)
301 {
302 umem_log_header_t my_lh;
303 uintptr_t pos = (uintptr_t)val;
304 size_t size;
305
306 if (pos == NULL)
307 return;
308
309 if (mdb_vread(&my_lh, sizeof (umem_log_header_t), pos) == -1) {
310 mdb_warn("\nunable to read umem_%s_log pointer %p",
311 name, pos);
312 return;
313 }
314
315 size = my_lh.lh_chunksize * my_lh.lh_nchunks;
316
317 if (size % (1024 * 1024) == 0)
318 mdb_printf("%s=%dm ", name, size / (1024 * 1024));
319 else if (size % 1024 == 0)
320 mdb_printf("%s=%dk ", name, size / 1024);
321 else
322 mdb_printf("%s=%d ", name, size);
323 }
324
325 typedef struct umem_debug_flags {
326 const char *udf_name;
327 uint_t udf_flags;
328 uint_t udf_clear; /* if 0, uses udf_flags */
329 } umem_debug_flags_t;
330
331 umem_debug_flags_t umem_status_flags[] = {
332 { "random", UMF_RANDOMIZE, UMF_RANDOM },
333 { "default", UMF_AUDIT | UMF_DEADBEEF | UMF_REDZONE | UMF_CONTENTS },
334 { "audit", UMF_AUDIT },
335 { "guards", UMF_DEADBEEF | UMF_REDZONE },
336 { "nosignal", UMF_CHECKSIGNAL },
337 { "firewall", UMF_FIREWALL },
338 { "lite", UMF_LITE },
339 { NULL }
340 };
341
342 /*ARGSUSED*/
343 int
umem_status(uintptr_t addr,uint_t flags,int ac,const mdb_arg_t * argv)344 umem_status(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
345 {
346 int umem_logging;
347
348 umem_log_header_t *umem_transaction_log;
349 umem_log_header_t *umem_content_log;
350 umem_log_header_t *umem_failure_log;
351 umem_log_header_t *umem_slab_log;
352
353 mdb_printf("Status:\t\t%s\n",
354 umem_ready == UMEM_READY_INIT_FAILED ? "initialization failed" :
355 umem_ready == UMEM_READY_STARTUP ? "uninitialized" :
356 umem_ready == UMEM_READY_INITING ? "initialization in process" :
357 umem_ready == UMEM_READY ? "ready and active" :
358 umem_ready == 0 ? "not loaded into address space" :
359 "unknown (umem_ready invalid)");
360
361 if (umem_ready == 0)
362 return (DCMD_OK);
363
364 mdb_printf("Concurrency:\t%d\n", umem_max_ncpus);
365
366 if (UMEM_READVAR(umem_logging))
367 goto err;
368 if (UMEM_READVAR(umem_transaction_log))
369 goto err;
370 if (UMEM_READVAR(umem_content_log))
371 goto err;
372 if (UMEM_READVAR(umem_failure_log))
373 goto err;
374 if (UMEM_READVAR(umem_slab_log))
375 goto err;
376
377 mdb_printf("Logs:\t\t");
378 umem_log_status("transaction", umem_transaction_log);
379 umem_log_status("content", umem_content_log);
380 umem_log_status("fail", umem_failure_log);
381 umem_log_status("slab", umem_slab_log);
382 if (!umem_logging)
383 mdb_printf("(inactive)");
384 mdb_printf("\n");
385
386 mdb_printf("Message buffer:\n");
387 return (umem_abort_messages());
388
389 err:
390 mdb_printf("Message buffer:\n");
391 (void) umem_abort_messages();
392 return (DCMD_ERR);
393 }
394
395 typedef struct {
396 uintptr_t ucw_first;
397 uintptr_t ucw_current;
398 } umem_cache_walk_t;
399
400 int
umem_cache_walk_init(mdb_walk_state_t * wsp)401 umem_cache_walk_init(mdb_walk_state_t *wsp)
402 {
403 umem_cache_walk_t *ucw;
404 umem_cache_t c;
405 uintptr_t cp;
406 GElf_Sym sym;
407
408 if (umem_lookup_by_name("umem_null_cache", &sym) == -1) {
409 mdb_warn("couldn't find umem_null_cache");
410 return (WALK_ERR);
411 }
412
413 cp = (uintptr_t)sym.st_value;
414
415 if (mdb_vread(&c, sizeof (umem_cache_t), cp) == -1) {
416 mdb_warn("couldn't read cache at %p", cp);
417 return (WALK_ERR);
418 }
419
420 ucw = mdb_alloc(sizeof (umem_cache_walk_t), UM_SLEEP);
421
422 ucw->ucw_first = cp;
423 ucw->ucw_current = (uintptr_t)c.cache_next;
424 wsp->walk_data = ucw;
425
426 return (WALK_NEXT);
427 }
428
429 int
umem_cache_walk_step(mdb_walk_state_t * wsp)430 umem_cache_walk_step(mdb_walk_state_t *wsp)
431 {
432 umem_cache_walk_t *ucw = wsp->walk_data;
433 umem_cache_t c;
434 int status;
435
436 if (mdb_vread(&c, sizeof (umem_cache_t), ucw->ucw_current) == -1) {
437 mdb_warn("couldn't read cache at %p", ucw->ucw_current);
438 return (WALK_DONE);
439 }
440
441 status = wsp->walk_callback(ucw->ucw_current, &c, wsp->walk_cbdata);
442
443 if ((ucw->ucw_current = (uintptr_t)c.cache_next) == ucw->ucw_first)
444 return (WALK_DONE);
445
446 return (status);
447 }
448
449 void
umem_cache_walk_fini(mdb_walk_state_t * wsp)450 umem_cache_walk_fini(mdb_walk_state_t *wsp)
451 {
452 umem_cache_walk_t *ucw = wsp->walk_data;
453 mdb_free(ucw, sizeof (umem_cache_walk_t));
454 }
455
456 typedef struct {
457 umem_cpu_t *ucw_cpus;
458 uint32_t ucw_current;
459 uint32_t ucw_max;
460 } umem_cpu_walk_state_t;
461
462 int
umem_cpu_walk_init(mdb_walk_state_t * wsp)463 umem_cpu_walk_init(mdb_walk_state_t *wsp)
464 {
465 umem_cpu_t *umem_cpus;
466
467 umem_cpu_walk_state_t *ucw;
468
469 if (umem_readvar(&umem_cpus, "umem_cpus") == -1) {
470 mdb_warn("failed to read 'umem_cpus'");
471 return (WALK_ERR);
472 }
473
474 ucw = mdb_alloc(sizeof (*ucw), UM_SLEEP);
475
476 ucw->ucw_cpus = umem_cpus;
477 ucw->ucw_current = 0;
478 ucw->ucw_max = umem_max_ncpus;
479
480 wsp->walk_data = ucw;
481 return (WALK_NEXT);
482 }
483
484 int
umem_cpu_walk_step(mdb_walk_state_t * wsp)485 umem_cpu_walk_step(mdb_walk_state_t *wsp)
486 {
487 umem_cpu_t cpu;
488 umem_cpu_walk_state_t *ucw = wsp->walk_data;
489
490 uintptr_t caddr;
491
492 if (ucw->ucw_current >= ucw->ucw_max)
493 return (WALK_DONE);
494
495 caddr = (uintptr_t)&(ucw->ucw_cpus[ucw->ucw_current]);
496
497 if (mdb_vread(&cpu, sizeof (umem_cpu_t), caddr) == -1) {
498 mdb_warn("failed to read cpu %d", ucw->ucw_current);
499 return (WALK_ERR);
500 }
501
502 ucw->ucw_current++;
503
504 return (wsp->walk_callback(caddr, &cpu, wsp->walk_cbdata));
505 }
506
507 void
umem_cpu_walk_fini(mdb_walk_state_t * wsp)508 umem_cpu_walk_fini(mdb_walk_state_t *wsp)
509 {
510 umem_cpu_walk_state_t *ucw = wsp->walk_data;
511
512 mdb_free(ucw, sizeof (*ucw));
513 }
514
515 int
umem_cpu_cache_walk_init(mdb_walk_state_t * wsp)516 umem_cpu_cache_walk_init(mdb_walk_state_t *wsp)
517 {
518 if (wsp->walk_addr == NULL) {
519 mdb_warn("umem_cpu_cache doesn't support global walks");
520 return (WALK_ERR);
521 }
522
523 if (mdb_layered_walk("umem_cpu", wsp) == -1) {
524 mdb_warn("couldn't walk 'umem_cpu'");
525 return (WALK_ERR);
526 }
527
528 wsp->walk_data = (void *)wsp->walk_addr;
529
530 return (WALK_NEXT);
531 }
532
533 int
umem_cpu_cache_walk_step(mdb_walk_state_t * wsp)534 umem_cpu_cache_walk_step(mdb_walk_state_t *wsp)
535 {
536 uintptr_t caddr = (uintptr_t)wsp->walk_data;
537 const umem_cpu_t *cpu = wsp->walk_layer;
538 umem_cpu_cache_t cc;
539
540 caddr += cpu->cpu_cache_offset;
541
542 if (mdb_vread(&cc, sizeof (umem_cpu_cache_t), caddr) == -1) {
543 mdb_warn("couldn't read umem_cpu_cache at %p", caddr);
544 return (WALK_ERR);
545 }
546
547 return (wsp->walk_callback(caddr, &cc, wsp->walk_cbdata));
548 }
549
550 int
umem_slab_walk_init(mdb_walk_state_t * wsp)551 umem_slab_walk_init(mdb_walk_state_t *wsp)
552 {
553 uintptr_t caddr = wsp->walk_addr;
554 umem_cache_t c;
555
556 if (caddr == NULL) {
557 mdb_warn("umem_slab doesn't support global walks\n");
558 return (WALK_ERR);
559 }
560
561 if (mdb_vread(&c, sizeof (c), caddr) == -1) {
562 mdb_warn("couldn't read umem_cache at %p", caddr);
563 return (WALK_ERR);
564 }
565
566 wsp->walk_data =
567 (void *)(caddr + offsetof(umem_cache_t, cache_nullslab));
568 wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_next;
569
570 return (WALK_NEXT);
571 }
572
573 int
umem_slab_walk_partial_init(mdb_walk_state_t * wsp)574 umem_slab_walk_partial_init(mdb_walk_state_t *wsp)
575 {
576 uintptr_t caddr = wsp->walk_addr;
577 umem_cache_t c;
578
579 if (caddr == NULL) {
580 mdb_warn("umem_slab_partial doesn't support global walks\n");
581 return (WALK_ERR);
582 }
583
584 if (mdb_vread(&c, sizeof (c), caddr) == -1) {
585 mdb_warn("couldn't read umem_cache at %p", caddr);
586 return (WALK_ERR);
587 }
588
589 wsp->walk_data =
590 (void *)(caddr + offsetof(umem_cache_t, cache_nullslab));
591 wsp->walk_addr = (uintptr_t)c.cache_freelist;
592
593 /*
594 * Some consumers (umem_walk_step(), in particular) require at
595 * least one callback if there are any buffers in the cache. So
596 * if there are *no* partial slabs, report the last full slab, if
597 * any.
598 *
599 * Yes, this is ugly, but it's cleaner than the other possibilities.
600 */
601 if ((uintptr_t)wsp->walk_data == wsp->walk_addr)
602 wsp->walk_addr = (uintptr_t)c.cache_nullslab.slab_prev;
603
604 return (WALK_NEXT);
605 }
606
607 int
umem_slab_walk_step(mdb_walk_state_t * wsp)608 umem_slab_walk_step(mdb_walk_state_t *wsp)
609 {
610 umem_slab_t s;
611 uintptr_t addr = wsp->walk_addr;
612 uintptr_t saddr = (uintptr_t)wsp->walk_data;
613 uintptr_t caddr = saddr - offsetof(umem_cache_t, cache_nullslab);
614
615 if (addr == saddr)
616 return (WALK_DONE);
617
618 if (mdb_vread(&s, sizeof (s), addr) == -1) {
619 mdb_warn("failed to read slab at %p", wsp->walk_addr);
620 return (WALK_ERR);
621 }
622
623 if ((uintptr_t)s.slab_cache != caddr) {
624 mdb_warn("slab %p isn't in cache %p (in cache %p)\n",
625 addr, caddr, s.slab_cache);
626 return (WALK_ERR);
627 }
628
629 wsp->walk_addr = (uintptr_t)s.slab_next;
630
631 return (wsp->walk_callback(addr, &s, wsp->walk_cbdata));
632 }
633
634 int
umem_cache(uintptr_t addr,uint_t flags,int ac,const mdb_arg_t * argv)635 umem_cache(uintptr_t addr, uint_t flags, int ac, const mdb_arg_t *argv)
636 {
637 umem_cache_t c;
638
639 if (!(flags & DCMD_ADDRSPEC)) {
640 if (mdb_walk_dcmd("umem_cache", "umem_cache", ac, argv) == -1) {
641 mdb_warn("can't walk umem_cache");
642 return (DCMD_ERR);
643 }
644 return (DCMD_OK);
645 }
646
647 if (DCMD_HDRSPEC(flags))
648 mdb_printf("%-?s %-25s %4s %8s %8s %8s\n", "ADDR", "NAME",
649 "FLAG", "CFLAG", "BUFSIZE", "BUFTOTL");
650
651 if (mdb_vread(&c, sizeof (c), addr) == -1) {
652 mdb_warn("couldn't read umem_cache at %p", addr);
653 return (DCMD_ERR);
654 }
655
656 mdb_printf("%0?p %-25s %04x %08x %8ld %8lld\n", addr, c.cache_name,
657 c.cache_flags, c.cache_cflags, c.cache_bufsize, c.cache_buftotal);
658
659 return (DCMD_OK);
660 }
661
662 static int
addrcmp(const void * lhs,const void * rhs)663 addrcmp(const void *lhs, const void *rhs)
664 {
665 uintptr_t p1 = *((uintptr_t *)lhs);
666 uintptr_t p2 = *((uintptr_t *)rhs);
667
668 if (p1 < p2)
669 return (-1);
670 if (p1 > p2)
671 return (1);
672 return (0);
673 }
674
675 static int
bufctlcmp(const umem_bufctl_audit_t ** lhs,const umem_bufctl_audit_t ** rhs)676 bufctlcmp(const umem_bufctl_audit_t **lhs, const umem_bufctl_audit_t **rhs)
677 {
678 const umem_bufctl_audit_t *bcp1 = *lhs;
679 const umem_bufctl_audit_t *bcp2 = *rhs;
680
681 if (bcp1->bc_timestamp > bcp2->bc_timestamp)
682 return (-1);
683
684 if (bcp1->bc_timestamp < bcp2->bc_timestamp)
685 return (1);
686
687 return (0);
688 }
689
690 typedef struct umem_hash_walk {
691 uintptr_t *umhw_table;
692 size_t umhw_nelems;
693 size_t umhw_pos;
694 umem_bufctl_t umhw_cur;
695 } umem_hash_walk_t;
696
697 int
umem_hash_walk_init(mdb_walk_state_t * wsp)698 umem_hash_walk_init(mdb_walk_state_t *wsp)
699 {
700 umem_hash_walk_t *umhw;
701 uintptr_t *hash;
702 umem_cache_t c;
703 uintptr_t haddr, addr = wsp->walk_addr;
704 size_t nelems;
705 size_t hsize;
706
707 if (addr == NULL) {
708 mdb_warn("umem_hash doesn't support global walks\n");
709 return (WALK_ERR);
710 }
711
712 if (mdb_vread(&c, sizeof (c), addr) == -1) {
713 mdb_warn("couldn't read cache at addr %p", addr);
714 return (WALK_ERR);
715 }
716
717 if (!(c.cache_flags & UMF_HASH)) {
718 mdb_warn("cache %p doesn't have a hash table\n", addr);
719 return (WALK_DONE); /* nothing to do */
720 }
721
722 umhw = mdb_zalloc(sizeof (umem_hash_walk_t), UM_SLEEP);
723 umhw->umhw_cur.bc_next = NULL;
724 umhw->umhw_pos = 0;
725
726 umhw->umhw_nelems = nelems = c.cache_hash_mask + 1;
727 hsize = nelems * sizeof (uintptr_t);
728 haddr = (uintptr_t)c.cache_hash_table;
729
730 umhw->umhw_table = hash = mdb_alloc(hsize, UM_SLEEP);
731 if (mdb_vread(hash, hsize, haddr) == -1) {
732 mdb_warn("failed to read hash table at %p", haddr);
733 mdb_free(hash, hsize);
734 mdb_free(umhw, sizeof (umem_hash_walk_t));
735 return (WALK_ERR);
736 }
737
738 wsp->walk_data = umhw;
739
740 return (WALK_NEXT);
741 }
742
743 int
umem_hash_walk_step(mdb_walk_state_t * wsp)744 umem_hash_walk_step(mdb_walk_state_t *wsp)
745 {
746 umem_hash_walk_t *umhw = wsp->walk_data;
747 uintptr_t addr = NULL;
748
749 if ((addr = (uintptr_t)umhw->umhw_cur.bc_next) == NULL) {
750 while (umhw->umhw_pos < umhw->umhw_nelems) {
751 if ((addr = umhw->umhw_table[umhw->umhw_pos++]) != NULL)
752 break;
753 }
754 }
755 if (addr == NULL)
756 return (WALK_DONE);
757
758 if (mdb_vread(&umhw->umhw_cur, sizeof (umem_bufctl_t), addr) == -1) {
759 mdb_warn("couldn't read umem_bufctl_t at addr %p", addr);
760 return (WALK_ERR);
761 }
762
763 return (wsp->walk_callback(addr, &umhw->umhw_cur, wsp->walk_cbdata));
764 }
765
766 void
umem_hash_walk_fini(mdb_walk_state_t * wsp)767 umem_hash_walk_fini(mdb_walk_state_t *wsp)
768 {
769 umem_hash_walk_t *umhw = wsp->walk_data;
770
771 if (umhw == NULL)
772 return;
773
774 mdb_free(umhw->umhw_table, umhw->umhw_nelems * sizeof (uintptr_t));
775 mdb_free(umhw, sizeof (umem_hash_walk_t));
776 }
777
778 /*
779 * Find the address of the bufctl structure for the address 'buf' in cache
780 * 'cp', which is at address caddr, and place it in *out.
781 */
782 static int
umem_hash_lookup(umem_cache_t * cp,uintptr_t caddr,void * buf,uintptr_t * out)783 umem_hash_lookup(umem_cache_t *cp, uintptr_t caddr, void *buf, uintptr_t *out)
784 {
785 uintptr_t bucket = (uintptr_t)UMEM_HASH(cp, buf);
786 umem_bufctl_t *bcp;
787 umem_bufctl_t bc;
788
789 if (mdb_vread(&bcp, sizeof (umem_bufctl_t *), bucket) == -1) {
790 mdb_warn("unable to read hash bucket for %p in cache %p",
791 buf, caddr);
792 return (-1);
793 }
794
795 while (bcp != NULL) {
796 if (mdb_vread(&bc, sizeof (umem_bufctl_t),
797 (uintptr_t)bcp) == -1) {
798 mdb_warn("unable to read bufctl at %p", bcp);
799 return (-1);
800 }
801 if (bc.bc_addr == buf) {
802 *out = (uintptr_t)bcp;
803 return (0);
804 }
805 bcp = bc.bc_next;
806 }
807
808 mdb_warn("unable to find bufctl for %p in cache %p\n", buf, caddr);
809 return (-1);
810 }
811
812 int
umem_get_magsize(const umem_cache_t * cp)813 umem_get_magsize(const umem_cache_t *cp)
814 {
815 uintptr_t addr = (uintptr_t)cp->cache_magtype;
816 GElf_Sym mt_sym;
817 umem_magtype_t mt;
818 int res;
819
820 /*
821 * if cpu 0 has a non-zero magsize, it must be correct. caches
822 * with UMF_NOMAGAZINE have disabled their magazine layers, so
823 * it is okay to return 0 for them.
824 */
825 if ((res = cp->cache_cpu[0].cc_magsize) != 0 ||
826 (cp->cache_flags & UMF_NOMAGAZINE))
827 return (res);
828
829 if (umem_lookup_by_name("umem_magtype", &mt_sym) == -1) {
830 mdb_warn("unable to read 'umem_magtype'");
831 } else if (addr < mt_sym.st_value ||
832 addr + sizeof (mt) - 1 > mt_sym.st_value + mt_sym.st_size - 1 ||
833 ((addr - mt_sym.st_value) % sizeof (mt)) != 0) {
834 mdb_warn("cache '%s' has invalid magtype pointer (%p)\n",
835 cp->cache_name, addr);
836 return (0);
837 }
838 if (mdb_vread(&mt, sizeof (mt), addr) == -1) {
839 mdb_warn("unable to read magtype at %a", addr);
840 return (0);
841 }
842 return (mt.mt_magsize);
843 }
844
845 /*ARGSUSED*/
846 static int
umem_estimate_slab(uintptr_t addr,const umem_slab_t * sp,size_t * est)847 umem_estimate_slab(uintptr_t addr, const umem_slab_t *sp, size_t *est)
848 {
849 *est -= (sp->slab_chunks - sp->slab_refcnt);
850
851 return (WALK_NEXT);
852 }
853
854 /*
855 * Returns an upper bound on the number of allocated buffers in a given
856 * cache.
857 */
858 size_t
umem_estimate_allocated(uintptr_t addr,const umem_cache_t * cp)859 umem_estimate_allocated(uintptr_t addr, const umem_cache_t *cp)
860 {
861 int magsize;
862 size_t cache_est;
863
864 cache_est = cp->cache_buftotal;
865
866 (void) mdb_pwalk("umem_slab_partial",
867 (mdb_walk_cb_t)umem_estimate_slab, &cache_est, addr);
868
869 if ((magsize = umem_get_magsize(cp)) != 0) {
870 size_t mag_est = cp->cache_full.ml_total * magsize;
871
872 if (cache_est >= mag_est) {
873 cache_est -= mag_est;
874 } else {
875 mdb_warn("cache %p's magazine layer holds more buffers "
876 "than the slab layer.\n", addr);
877 }
878 }
879 return (cache_est);
880 }
881
882 #define READMAG_ROUNDS(rounds) { \
883 if (mdb_vread(mp, magbsize, (uintptr_t)ump) == -1) { \
884 mdb_warn("couldn't read magazine at %p", ump); \
885 goto fail; \
886 } \
887 for (i = 0; i < rounds; i++) { \
888 maglist[magcnt++] = mp->mag_round[i]; \
889 if (magcnt == magmax) { \
890 mdb_warn("%d magazines exceeds fudge factor\n", \
891 magcnt); \
892 goto fail; \
893 } \
894 } \
895 }
896
897 static int
umem_read_magazines(umem_cache_t * cp,uintptr_t addr,void *** maglistp,size_t * magcntp,size_t * magmaxp)898 umem_read_magazines(umem_cache_t *cp, uintptr_t addr,
899 void ***maglistp, size_t *magcntp, size_t *magmaxp)
900 {
901 umem_magazine_t *ump, *mp;
902 void **maglist = NULL;
903 int i, cpu;
904 size_t magsize, magmax, magbsize;
905 size_t magcnt = 0;
906
907 /*
908 * Read the magtype out of the cache, after verifying the pointer's
909 * correctness.
910 */
911 magsize = umem_get_magsize(cp);
912 if (magsize == 0) {
913 *maglistp = NULL;
914 *magcntp = 0;
915 *magmaxp = 0;
916 return (0);
917 }
918
919 /*
920 * There are several places where we need to go buffer hunting:
921 * the per-CPU loaded magazine, the per-CPU spare full magazine,
922 * and the full magazine list in the depot.
923 *
924 * For an upper bound on the number of buffers in the magazine
925 * layer, we have the number of magazines on the cache_full
926 * list plus at most two magazines per CPU (the loaded and the
927 * spare). Toss in 100 magazines as a fudge factor in case this
928 * is live (the number "100" comes from the same fudge factor in
929 * crash(1M)).
930 */
931 magmax = (cp->cache_full.ml_total + 2 * umem_max_ncpus + 100) * magsize;
932 magbsize = offsetof(umem_magazine_t, mag_round[magsize]);
933
934 if (magbsize >= PAGESIZE / 2) {
935 mdb_warn("magazine size for cache %p unreasonable (%x)\n",
936 addr, magbsize);
937 return (-1);
938 }
939
940 maglist = mdb_alloc(magmax * sizeof (void *), UM_SLEEP);
941 mp = mdb_alloc(magbsize, UM_SLEEP);
942 if (mp == NULL || maglist == NULL)
943 goto fail;
944
945 /*
946 * First up: the magazines in the depot (i.e. on the cache_full list).
947 */
948 for (ump = cp->cache_full.ml_list; ump != NULL; ) {
949 READMAG_ROUNDS(magsize);
950 ump = mp->mag_next;
951
952 if (ump == cp->cache_full.ml_list)
953 break; /* cache_full list loop detected */
954 }
955
956 dprintf(("cache_full list done\n"));
957
958 /*
959 * Now whip through the CPUs, snagging the loaded magazines
960 * and full spares.
961 */
962 for (cpu = 0; cpu < umem_max_ncpus; cpu++) {
963 umem_cpu_cache_t *ccp = &cp->cache_cpu[cpu];
964
965 dprintf(("reading cpu cache %p\n",
966 (uintptr_t)ccp - (uintptr_t)cp + addr));
967
968 if (ccp->cc_rounds > 0 &&
969 (ump = ccp->cc_loaded) != NULL) {
970 dprintf(("reading %d loaded rounds\n", ccp->cc_rounds));
971 READMAG_ROUNDS(ccp->cc_rounds);
972 }
973
974 if (ccp->cc_prounds > 0 &&
975 (ump = ccp->cc_ploaded) != NULL) {
976 dprintf(("reading %d previously loaded rounds\n",
977 ccp->cc_prounds));
978 READMAG_ROUNDS(ccp->cc_prounds);
979 }
980 }
981
982 dprintf(("magazine layer: %d buffers\n", magcnt));
983
984 mdb_free(mp, magbsize);
985
986 *maglistp = maglist;
987 *magcntp = magcnt;
988 *magmaxp = magmax;
989
990 return (0);
991
992 fail:
993 if (mp)
994 mdb_free(mp, magbsize);
995 if (maglist)
996 mdb_free(maglist, magmax * sizeof (void *));
997
998 return (-1);
999 }
1000
1001 typedef struct umem_read_ptc_walk {
1002 void **urpw_buf;
1003 size_t urpw_cnt;
1004 size_t urpw_max;
1005 } umem_read_ptc_walk_t;
1006
1007 /*ARGSUSED*/
1008 static int
umem_read_ptc_walk_buf(uintptr_t addr,const void * ignored,umem_read_ptc_walk_t * urpw)1009 umem_read_ptc_walk_buf(uintptr_t addr,
1010 const void *ignored, umem_read_ptc_walk_t *urpw)
1011 {
1012 if (urpw->urpw_cnt == urpw->urpw_max) {
1013 size_t nmax = urpw->urpw_max ? (urpw->urpw_max << 1) : 1;
1014 void **new = mdb_zalloc(nmax * sizeof (void *), UM_SLEEP);
1015
1016 if (nmax > 1) {
1017 size_t osize = urpw->urpw_max * sizeof (void *);
1018 bcopy(urpw->urpw_buf, new, osize);
1019 mdb_free(urpw->urpw_buf, osize);
1020 }
1021
1022 urpw->urpw_buf = new;
1023 urpw->urpw_max = nmax;
1024 }
1025
1026 urpw->urpw_buf[urpw->urpw_cnt++] = (void *)addr;
1027
1028 return (WALK_NEXT);
1029 }
1030
1031 static int
umem_read_ptc(umem_cache_t * cp,void *** buflistp,size_t * bufcntp,size_t * bufmaxp)1032 umem_read_ptc(umem_cache_t *cp,
1033 void ***buflistp, size_t *bufcntp, size_t *bufmaxp)
1034 {
1035 umem_read_ptc_walk_t urpw;
1036 char walk[60];
1037 int rval;
1038
1039 if (!(cp->cache_flags & UMF_PTC))
1040 return (0);
1041
1042 (void) mdb_snprintf(walk, sizeof (walk), "umem_ptc_%d",
1043 cp->cache_bufsize);
1044
1045 urpw.urpw_buf = *buflistp;
1046 urpw.urpw_cnt = *bufcntp;
1047 urpw.urpw_max = *bufmaxp;
1048
1049 if ((rval = mdb_walk(walk,
1050 (mdb_walk_cb_t)umem_read_ptc_walk_buf, &urpw)) == -1) {
1051 mdb_warn("couldn't walk %s", walk);
1052 }
1053
1054 *buflistp = urpw.urpw_buf;
1055 *bufcntp = urpw.urpw_cnt;
1056 *bufmaxp = urpw.urpw_max;
1057
1058 return (rval);
1059 }
1060
1061 static int
umem_walk_callback(mdb_walk_state_t * wsp,uintptr_t buf)1062 umem_walk_callback(mdb_walk_state_t *wsp, uintptr_t buf)
1063 {
1064 return (wsp->walk_callback(buf, NULL, wsp->walk_cbdata));
1065 }
1066
1067 static int
bufctl_walk_callback(umem_cache_t * cp,mdb_walk_state_t * wsp,uintptr_t buf)1068 bufctl_walk_callback(umem_cache_t *cp, mdb_walk_state_t *wsp, uintptr_t buf)
1069 {
1070 umem_bufctl_audit_t *b;
1071 UMEM_LOCAL_BUFCTL_AUDIT(&b);
1072
1073 /*
1074 * if UMF_AUDIT is not set, we know that we're looking at a
1075 * umem_bufctl_t.
1076 */
1077 if (!(cp->cache_flags & UMF_AUDIT) ||
1078 mdb_vread(b, UMEM_BUFCTL_AUDIT_SIZE, buf) == -1) {
1079 (void) memset(b, 0, UMEM_BUFCTL_AUDIT_SIZE);
1080 if (mdb_vread(b, sizeof (umem_bufctl_t), buf) == -1) {
1081 mdb_warn("unable to read bufctl at %p", buf);
1082 return (WALK_ERR);
1083 }
1084 }
1085
1086 return (wsp->walk_callback(buf, b, wsp->walk_cbdata));
1087 }
1088
1089 typedef struct umem_walk {
1090 int umw_type;
1091
1092 uintptr_t umw_addr; /* cache address */
1093 umem_cache_t *umw_cp;
1094 size_t umw_csize;
1095
1096 /*
1097 * magazine layer
1098 */
1099 void **umw_maglist;
1100 size_t umw_max;
1101 size_t umw_count;
1102 size_t umw_pos;
1103
1104 /*
1105 * slab layer
1106 */
1107 char *umw_valid; /* to keep track of freed buffers */
1108 char *umw_ubase; /* buffer for slab data */
1109 } umem_walk_t;
1110
1111 static int
umem_walk_init_common(mdb_walk_state_t * wsp,int type)1112 umem_walk_init_common(mdb_walk_state_t *wsp, int type)
1113 {
1114 umem_walk_t *umw;
1115 int csize;
1116 umem_cache_t *cp;
1117 size_t vm_quantum;
1118
1119 size_t magmax, magcnt;
1120 void **maglist = NULL;
1121 uint_t chunksize, slabsize;
1122 int status = WALK_ERR;
1123 uintptr_t addr = wsp->walk_addr;
1124 const char *layered;
1125
1126 type &= ~UM_HASH;
1127
1128 if (addr == NULL) {
1129 mdb_warn("umem walk doesn't support global walks\n");
1130 return (WALK_ERR);
1131 }
1132
1133 dprintf(("walking %p\n", addr));
1134
1135 /*
1136 * The number of "cpus" determines how large the cache is.
1137 */
1138 csize = UMEM_CACHE_SIZE(umem_max_ncpus);
1139 cp = mdb_alloc(csize, UM_SLEEP);
1140
1141 if (mdb_vread(cp, csize, addr) == -1) {
1142 mdb_warn("couldn't read cache at addr %p", addr);
1143 goto out2;
1144 }
1145
1146 /*
1147 * It's easy for someone to hand us an invalid cache address.
1148 * Unfortunately, it is hard for this walker to survive an
1149 * invalid cache cleanly. So we make sure that:
1150 *
1151 * 1. the vmem arena for the cache is readable,
1152 * 2. the vmem arena's quantum is a power of 2,
1153 * 3. our slabsize is a multiple of the quantum, and
1154 * 4. our chunksize is >0 and less than our slabsize.
1155 */
1156 if (mdb_vread(&vm_quantum, sizeof (vm_quantum),
1157 (uintptr_t)&cp->cache_arena->vm_quantum) == -1 ||
1158 vm_quantum == 0 ||
1159 (vm_quantum & (vm_quantum - 1)) != 0 ||
1160 cp->cache_slabsize < vm_quantum ||
1161 P2PHASE(cp->cache_slabsize, vm_quantum) != 0 ||
1162 cp->cache_chunksize == 0 ||
1163 cp->cache_chunksize > cp->cache_slabsize) {
1164 mdb_warn("%p is not a valid umem_cache_t\n", addr);
1165 goto out2;
1166 }
1167
1168 dprintf(("buf total is %d\n", cp->cache_buftotal));
1169
1170 if (cp->cache_buftotal == 0) {
1171 mdb_free(cp, csize);
1172 return (WALK_DONE);
1173 }
1174
1175 /*
1176 * If they ask for bufctls, but it's a small-slab cache,
1177 * there is nothing to report.
1178 */
1179 if ((type & UM_BUFCTL) && !(cp->cache_flags & UMF_HASH)) {
1180 dprintf(("bufctl requested, not UMF_HASH (flags: %p)\n",
1181 cp->cache_flags));
1182 mdb_free(cp, csize);
1183 return (WALK_DONE);
1184 }
1185
1186 /*
1187 * Read in the contents of the magazine layer
1188 */
1189 if (umem_read_magazines(cp, addr, &maglist, &magcnt, &magmax) != 0)
1190 goto out2;
1191
1192 /*
1193 * Read in the contents of the per-thread caches, if any
1194 */
1195 if (umem_read_ptc(cp, &maglist, &magcnt, &magmax) != 0)
1196 goto out2;
1197
1198 /*
1199 * We have all of the buffers from the magazines and from the
1200 * per-thread cache (if any); if we are walking allocated buffers,
1201 * sort them so we can bsearch them later.
1202 */
1203 if (type & UM_ALLOCATED)
1204 qsort(maglist, magcnt, sizeof (void *), addrcmp);
1205
1206 wsp->walk_data = umw = mdb_zalloc(sizeof (umem_walk_t), UM_SLEEP);
1207
1208 umw->umw_type = type;
1209 umw->umw_addr = addr;
1210 umw->umw_cp = cp;
1211 umw->umw_csize = csize;
1212 umw->umw_maglist = maglist;
1213 umw->umw_max = magmax;
1214 umw->umw_count = magcnt;
1215 umw->umw_pos = 0;
1216
1217 /*
1218 * When walking allocated buffers in a UMF_HASH cache, we walk the
1219 * hash table instead of the slab layer.
1220 */
1221 if ((cp->cache_flags & UMF_HASH) && (type & UM_ALLOCATED)) {
1222 layered = "umem_hash";
1223
1224 umw->umw_type |= UM_HASH;
1225 } else {
1226 /*
1227 * If we are walking freed buffers, we only need the
1228 * magazine layer plus the partially allocated slabs.
1229 * To walk allocated buffers, we need all of the slabs.
1230 */
1231 if (type & UM_ALLOCATED)
1232 layered = "umem_slab";
1233 else
1234 layered = "umem_slab_partial";
1235
1236 /*
1237 * for small-slab caches, we read in the entire slab. For
1238 * freed buffers, we can just walk the freelist. For
1239 * allocated buffers, we use a 'valid' array to track
1240 * the freed buffers.
1241 */
1242 if (!(cp->cache_flags & UMF_HASH)) {
1243 chunksize = cp->cache_chunksize;
1244 slabsize = cp->cache_slabsize;
1245
1246 umw->umw_ubase = mdb_alloc(slabsize +
1247 sizeof (umem_bufctl_t), UM_SLEEP);
1248
1249 if (type & UM_ALLOCATED)
1250 umw->umw_valid =
1251 mdb_alloc(slabsize / chunksize, UM_SLEEP);
1252 }
1253 }
1254
1255 status = WALK_NEXT;
1256
1257 if (mdb_layered_walk(layered, wsp) == -1) {
1258 mdb_warn("unable to start layered '%s' walk", layered);
1259 status = WALK_ERR;
1260 }
1261
1262 out1:
1263 if (status == WALK_ERR) {
1264 if (umw->umw_valid)
1265 mdb_free(umw->umw_valid, slabsize / chunksize);
1266
1267 if (umw->umw_ubase)
1268 mdb_free(umw->umw_ubase, slabsize +
1269 sizeof (umem_bufctl_t));
1270
1271 if (umw->umw_maglist)
1272 mdb_free(umw->umw_maglist, umw->umw_max *
1273 sizeof (uintptr_t));
1274
1275 mdb_free(umw, sizeof (umem_walk_t));
1276 wsp->walk_data = NULL;
1277 }
1278
1279 out2:
1280 if (status == WALK_ERR)
1281 mdb_free(cp, csize);
1282
1283 return (status);
1284 }
1285
1286 int
umem_walk_step(mdb_walk_state_t * wsp)1287 umem_walk_step(mdb_walk_state_t *wsp)
1288 {
1289 umem_walk_t *umw = wsp->walk_data;
1290 int type = umw->umw_type;
1291 umem_cache_t *cp = umw->umw_cp;
1292
1293 void **maglist = umw->umw_maglist;
1294 int magcnt = umw->umw_count;
1295
1296 uintptr_t chunksize, slabsize;
1297 uintptr_t addr;
1298 const umem_slab_t *sp;
1299 const umem_bufctl_t *bcp;
1300 umem_bufctl_t bc;
1301
1302 int chunks;
1303 char *kbase;
1304 void *buf;
1305 int i, ret;
1306
1307 char *valid, *ubase;
1308
1309 /*
1310 * first, handle the 'umem_hash' layered walk case
1311 */
1312 if (type & UM_HASH) {
1313 /*
1314 * We have a buffer which has been allocated out of the
1315 * global layer. We need to make sure that it's not
1316 * actually sitting in a magazine before we report it as
1317 * an allocated buffer.
1318 */
1319 buf = ((const umem_bufctl_t *)wsp->walk_layer)->bc_addr;
1320
1321 if (magcnt > 0 &&
1322 bsearch(&buf, maglist, magcnt, sizeof (void *),
1323 addrcmp) != NULL)
1324 return (WALK_NEXT);
1325
1326 if (type & UM_BUFCTL)
1327 return (bufctl_walk_callback(cp, wsp, wsp->walk_addr));
1328
1329 return (umem_walk_callback(wsp, (uintptr_t)buf));
1330 }
1331
1332 ret = WALK_NEXT;
1333
1334 addr = umw->umw_addr;
1335
1336 /*
1337 * If we're walking freed buffers, report everything in the
1338 * magazine layer before processing the first slab.
1339 */
1340 if ((type & UM_FREE) && magcnt != 0) {
1341 umw->umw_count = 0; /* only do this once */
1342 for (i = 0; i < magcnt; i++) {
1343 buf = maglist[i];
1344
1345 if (type & UM_BUFCTL) {
1346 uintptr_t out;
1347
1348 if (cp->cache_flags & UMF_BUFTAG) {
1349 umem_buftag_t *btp;
1350 umem_buftag_t tag;
1351
1352 /* LINTED - alignment */
1353 btp = UMEM_BUFTAG(cp, buf);
1354 if (mdb_vread(&tag, sizeof (tag),
1355 (uintptr_t)btp) == -1) {
1356 mdb_warn("reading buftag for "
1357 "%p at %p", buf, btp);
1358 continue;
1359 }
1360 out = (uintptr_t)tag.bt_bufctl;
1361 } else {
1362 if (umem_hash_lookup(cp, addr, buf,
1363 &out) == -1)
1364 continue;
1365 }
1366 ret = bufctl_walk_callback(cp, wsp, out);
1367 } else {
1368 ret = umem_walk_callback(wsp, (uintptr_t)buf);
1369 }
1370
1371 if (ret != WALK_NEXT)
1372 return (ret);
1373 }
1374 }
1375
1376 /*
1377 * Handle the buffers in the current slab
1378 */
1379 chunksize = cp->cache_chunksize;
1380 slabsize = cp->cache_slabsize;
1381
1382 sp = wsp->walk_layer;
1383 chunks = sp->slab_chunks;
1384 kbase = sp->slab_base;
1385
1386 dprintf(("kbase is %p\n", kbase));
1387
1388 if (!(cp->cache_flags & UMF_HASH)) {
1389 valid = umw->umw_valid;
1390 ubase = umw->umw_ubase;
1391
1392 if (mdb_vread(ubase, chunks * chunksize,
1393 (uintptr_t)kbase) == -1) {
1394 mdb_warn("failed to read slab contents at %p", kbase);
1395 return (WALK_ERR);
1396 }
1397
1398 /*
1399 * Set up the valid map as fully allocated -- we'll punch
1400 * out the freelist.
1401 */
1402 if (type & UM_ALLOCATED)
1403 (void) memset(valid, 1, chunks);
1404 } else {
1405 valid = NULL;
1406 ubase = NULL;
1407 }
1408
1409 /*
1410 * walk the slab's freelist
1411 */
1412 bcp = sp->slab_head;
1413
1414 dprintf(("refcnt is %d; chunks is %d\n", sp->slab_refcnt, chunks));
1415
1416 /*
1417 * since we could be in the middle of allocating a buffer,
1418 * our refcnt could be one higher than it aught. So we
1419 * check one further on the freelist than the count allows.
1420 */
1421 for (i = sp->slab_refcnt; i <= chunks; i++) {
1422 uint_t ndx;
1423
1424 dprintf(("bcp is %p\n", bcp));
1425
1426 if (bcp == NULL) {
1427 if (i == chunks)
1428 break;
1429 mdb_warn(
1430 "slab %p in cache %p freelist too short by %d\n",
1431 sp, addr, chunks - i);
1432 break;
1433 }
1434
1435 if (cp->cache_flags & UMF_HASH) {
1436 if (mdb_vread(&bc, sizeof (bc), (uintptr_t)bcp) == -1) {
1437 mdb_warn("failed to read bufctl ptr at %p",
1438 bcp);
1439 break;
1440 }
1441 buf = bc.bc_addr;
1442 } else {
1443 /*
1444 * Otherwise the buffer is (or should be) in the slab
1445 * that we've read in; determine its offset in the
1446 * slab, validate that it's not corrupt, and add to
1447 * our base address to find the umem_bufctl_t. (Note
1448 * that we don't need to add the size of the bufctl
1449 * to our offset calculation because of the slop that's
1450 * allocated for the buffer at ubase.)
1451 */
1452 uintptr_t offs = (uintptr_t)bcp - (uintptr_t)kbase;
1453
1454 if (offs > chunks * chunksize) {
1455 mdb_warn("found corrupt bufctl ptr %p"
1456 " in slab %p in cache %p\n", bcp,
1457 wsp->walk_addr, addr);
1458 break;
1459 }
1460
1461 bc = *((umem_bufctl_t *)((uintptr_t)ubase + offs));
1462 buf = UMEM_BUF(cp, bcp);
1463 }
1464
1465 ndx = ((uintptr_t)buf - (uintptr_t)kbase) / chunksize;
1466
1467 if (ndx > slabsize / cp->cache_bufsize) {
1468 /*
1469 * This is very wrong; we have managed to find
1470 * a buffer in the slab which shouldn't
1471 * actually be here. Emit a warning, and
1472 * try to continue.
1473 */
1474 mdb_warn("buf %p is out of range for "
1475 "slab %p, cache %p\n", buf, sp, addr);
1476 } else if (type & UM_ALLOCATED) {
1477 /*
1478 * we have found a buffer on the slab's freelist;
1479 * clear its entry
1480 */
1481 valid[ndx] = 0;
1482 } else {
1483 /*
1484 * Report this freed buffer
1485 */
1486 if (type & UM_BUFCTL) {
1487 ret = bufctl_walk_callback(cp, wsp,
1488 (uintptr_t)bcp);
1489 } else {
1490 ret = umem_walk_callback(wsp, (uintptr_t)buf);
1491 }
1492 if (ret != WALK_NEXT)
1493 return (ret);
1494 }
1495
1496 bcp = bc.bc_next;
1497 }
1498
1499 if (bcp != NULL) {
1500 dprintf(("slab %p in cache %p freelist too long (%p)\n",
1501 sp, addr, bcp));
1502 }
1503
1504 /*
1505 * If we are walking freed buffers, the loop above handled reporting
1506 * them.
1507 */
1508 if (type & UM_FREE)
1509 return (WALK_NEXT);
1510
1511 if (type & UM_BUFCTL) {
1512 mdb_warn("impossible situation: small-slab UM_BUFCTL walk for "
1513 "cache %p\n", addr);
1514 return (WALK_ERR);
1515 }
1516
1517 /*
1518 * Report allocated buffers, skipping buffers in the magazine layer.
1519 * We only get this far for small-slab caches.
1520 */
1521 for (i = 0; ret == WALK_NEXT && i < chunks; i++) {
1522 buf = (char *)kbase + i * chunksize;
1523
1524 if (!valid[i])
1525 continue; /* on slab freelist */
1526
1527 if (magcnt > 0 &&
1528 bsearch(&buf, maglist, magcnt, sizeof (void *),
1529 addrcmp) != NULL)
1530 continue; /* in magazine layer */
1531
1532 ret = umem_walk_callback(wsp, (uintptr_t)buf);
1533 }
1534 return (ret);
1535 }
1536
1537 void
umem_walk_fini(mdb_walk_state_t * wsp)1538 umem_walk_fini(mdb_walk_state_t *wsp)
1539 {
1540 umem_walk_t *umw = wsp->walk_data;
1541 uintptr_t chunksize;
1542 uintptr_t slabsize;
1543
1544 if (umw == NULL)
1545 return;
1546
1547 if (umw->umw_maglist != NULL)
1548 mdb_free(umw->umw_maglist, umw->umw_max * sizeof (void *));
1549
1550 chunksize = umw->umw_cp->cache_chunksize;
1551 slabsize = umw->umw_cp->cache_slabsize;
1552
1553 if (umw->umw_valid != NULL)
1554 mdb_free(umw->umw_valid, slabsize / chunksize);
1555 if (umw->umw_ubase != NULL)
1556 mdb_free(umw->umw_ubase, slabsize + sizeof (umem_bufctl_t));
1557
1558 mdb_free(umw->umw_cp, umw->umw_csize);
1559 mdb_free(umw, sizeof (umem_walk_t));
1560 }
1561
1562 /*ARGSUSED*/
1563 static int
umem_walk_all(uintptr_t addr,const umem_cache_t * c,mdb_walk_state_t * wsp)1564 umem_walk_all(uintptr_t addr, const umem_cache_t *c, mdb_walk_state_t *wsp)
1565 {
1566 /*
1567 * Buffers allocated from NOTOUCH caches can also show up as freed
1568 * memory in other caches. This can be a little confusing, so we
1569 * don't walk NOTOUCH caches when walking all caches (thereby assuring
1570 * that "::walk umem" and "::walk freemem" yield disjoint output).
1571 */
1572 if (c->cache_cflags & UMC_NOTOUCH)
1573 return (WALK_NEXT);
1574
1575 if (mdb_pwalk(wsp->walk_data, wsp->walk_callback,
1576 wsp->walk_cbdata, addr) == -1)
1577 return (WALK_DONE);
1578
1579 return (WALK_NEXT);
1580 }
1581
1582 #define UMEM_WALK_ALL(name, wsp) { \
1583 wsp->walk_data = (name); \
1584 if (mdb_walk("umem_cache", (mdb_walk_cb_t)umem_walk_all, wsp) == -1) \
1585 return (WALK_ERR); \
1586 return (WALK_DONE); \
1587 }
1588
1589 int
umem_walk_init(mdb_walk_state_t * wsp)1590 umem_walk_init(mdb_walk_state_t *wsp)
1591 {
1592 if (wsp->walk_arg != NULL)
1593 wsp->walk_addr = (uintptr_t)wsp->walk_arg;
1594
1595 if (wsp->walk_addr == NULL)
1596 UMEM_WALK_ALL("umem", wsp);
1597 return (umem_walk_init_common(wsp, UM_ALLOCATED));
1598 }
1599
1600 int
bufctl_walk_init(mdb_walk_state_t * wsp)1601 bufctl_walk_init(mdb_walk_state_t *wsp)
1602 {
1603 if (wsp->walk_addr == NULL)
1604 UMEM_WALK_ALL("bufctl", wsp);
1605 return (umem_walk_init_common(wsp, UM_ALLOCATED | UM_BUFCTL));
1606 }
1607
1608 int
freemem_walk_init(mdb_walk_state_t * wsp)1609 freemem_walk_init(mdb_walk_state_t *wsp)
1610 {
1611 if (wsp->walk_addr == NULL)
1612 UMEM_WALK_ALL("freemem", wsp);
1613 return (umem_walk_init_common(wsp, UM_FREE));
1614 }
1615
1616 int
freectl_walk_init(mdb_walk_state_t * wsp)1617 freectl_walk_init(mdb_walk_state_t *wsp)
1618 {
1619 if (wsp->walk_addr == NULL)
1620 UMEM_WALK_ALL("freectl", wsp);
1621 return (umem_walk_init_common(wsp, UM_FREE | UM_BUFCTL));
1622 }
1623
1624 typedef struct bufctl_history_walk {
1625 void *bhw_next;
1626 umem_cache_t *bhw_cache;
1627 umem_slab_t *bhw_slab;
1628 hrtime_t bhw_timestamp;
1629 } bufctl_history_walk_t;
1630
1631 int
bufctl_history_walk_init(mdb_walk_state_t * wsp)1632 bufctl_history_walk_init(mdb_walk_state_t *wsp)
1633 {
1634 bufctl_history_walk_t *bhw;
1635 umem_bufctl_audit_t bc;
1636 umem_bufctl_audit_t bcn;
1637
1638 if (wsp->walk_addr == NULL) {
1639 mdb_warn("bufctl_history walk doesn't support global walks\n");
1640 return (WALK_ERR);
1641 }
1642
1643 if (mdb_vread(&bc, sizeof (bc), wsp->walk_addr) == -1) {
1644 mdb_warn("unable to read bufctl at %p", wsp->walk_addr);
1645 return (WALK_ERR);
1646 }
1647
1648 bhw = mdb_zalloc(sizeof (*bhw), UM_SLEEP);
1649 bhw->bhw_timestamp = 0;
1650 bhw->bhw_cache = bc.bc_cache;
1651 bhw->bhw_slab = bc.bc_slab;
1652
1653 /*
1654 * sometimes the first log entry matches the base bufctl; in that
1655 * case, skip the base bufctl.
1656 */
1657 if (bc.bc_lastlog != NULL &&
1658 mdb_vread(&bcn, sizeof (bcn), (uintptr_t)bc.bc_lastlog) != -1 &&
1659 bc.bc_addr == bcn.bc_addr &&
1660 bc.bc_cache == bcn.bc_cache &&
1661 bc.bc_slab == bcn.bc_slab &&
1662 bc.bc_timestamp == bcn.bc_timestamp &&
1663 bc.bc_thread == bcn.bc_thread)
1664 bhw->bhw_next = bc.bc_lastlog;
1665 else
1666 bhw->bhw_next = (void *)wsp->walk_addr;
1667
1668 wsp->walk_addr = (uintptr_t)bc.bc_addr;
1669 wsp->walk_data = bhw;
1670
1671 return (WALK_NEXT);
1672 }
1673
1674 int
bufctl_history_walk_step(mdb_walk_state_t * wsp)1675 bufctl_history_walk_step(mdb_walk_state_t *wsp)
1676 {
1677 bufctl_history_walk_t *bhw = wsp->walk_data;
1678 uintptr_t addr = (uintptr_t)bhw->bhw_next;
1679 uintptr_t baseaddr = wsp->walk_addr;
1680 umem_bufctl_audit_t *b;
1681 UMEM_LOCAL_BUFCTL_AUDIT(&b);
1682
1683 if (addr == NULL)
1684 return (WALK_DONE);
1685
1686 if (mdb_vread(b, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
1687 mdb_warn("unable to read bufctl at %p", bhw->bhw_next);
1688 return (WALK_ERR);
1689 }
1690
1691 /*
1692 * The bufctl is only valid if the address, cache, and slab are
1693 * correct. We also check that the timestamp is decreasing, to
1694 * prevent infinite loops.
1695 */
1696 if ((uintptr_t)b->bc_addr != baseaddr ||
1697 b->bc_cache != bhw->bhw_cache ||
1698 b->bc_slab != bhw->bhw_slab ||
1699 (bhw->bhw_timestamp != 0 && b->bc_timestamp >= bhw->bhw_timestamp))
1700 return (WALK_DONE);
1701
1702 bhw->bhw_next = b->bc_lastlog;
1703 bhw->bhw_timestamp = b->bc_timestamp;
1704
1705 return (wsp->walk_callback(addr, b, wsp->walk_cbdata));
1706 }
1707
1708 void
bufctl_history_walk_fini(mdb_walk_state_t * wsp)1709 bufctl_history_walk_fini(mdb_walk_state_t *wsp)
1710 {
1711 bufctl_history_walk_t *bhw = wsp->walk_data;
1712
1713 mdb_free(bhw, sizeof (*bhw));
1714 }
1715
1716 typedef struct umem_log_walk {
1717 umem_bufctl_audit_t *ulw_base;
1718 umem_bufctl_audit_t **ulw_sorted;
1719 umem_log_header_t ulw_lh;
1720 size_t ulw_size;
1721 size_t ulw_maxndx;
1722 size_t ulw_ndx;
1723 } umem_log_walk_t;
1724
1725 int
umem_log_walk_init(mdb_walk_state_t * wsp)1726 umem_log_walk_init(mdb_walk_state_t *wsp)
1727 {
1728 uintptr_t lp = wsp->walk_addr;
1729 umem_log_walk_t *ulw;
1730 umem_log_header_t *lhp;
1731 int maxndx, i, j, k;
1732
1733 /*
1734 * By default (global walk), walk the umem_transaction_log. Otherwise
1735 * read the log whose umem_log_header_t is stored at walk_addr.
1736 */
1737 if (lp == NULL && umem_readvar(&lp, "umem_transaction_log") == -1) {
1738 mdb_warn("failed to read 'umem_transaction_log'");
1739 return (WALK_ERR);
1740 }
1741
1742 if (lp == NULL) {
1743 mdb_warn("log is disabled\n");
1744 return (WALK_ERR);
1745 }
1746
1747 ulw = mdb_zalloc(sizeof (umem_log_walk_t), UM_SLEEP);
1748 lhp = &ulw->ulw_lh;
1749
1750 if (mdb_vread(lhp, sizeof (umem_log_header_t), lp) == -1) {
1751 mdb_warn("failed to read log header at %p", lp);
1752 mdb_free(ulw, sizeof (umem_log_walk_t));
1753 return (WALK_ERR);
1754 }
1755
1756 ulw->ulw_size = lhp->lh_chunksize * lhp->lh_nchunks;
1757 ulw->ulw_base = mdb_alloc(ulw->ulw_size, UM_SLEEP);
1758 maxndx = lhp->lh_chunksize / UMEM_BUFCTL_AUDIT_SIZE - 1;
1759
1760 if (mdb_vread(ulw->ulw_base, ulw->ulw_size,
1761 (uintptr_t)lhp->lh_base) == -1) {
1762 mdb_warn("failed to read log at base %p", lhp->lh_base);
1763 mdb_free(ulw->ulw_base, ulw->ulw_size);
1764 mdb_free(ulw, sizeof (umem_log_walk_t));
1765 return (WALK_ERR);
1766 }
1767
1768 ulw->ulw_sorted = mdb_alloc(maxndx * lhp->lh_nchunks *
1769 sizeof (umem_bufctl_audit_t *), UM_SLEEP);
1770
1771 for (i = 0, k = 0; i < lhp->lh_nchunks; i++) {
1772 caddr_t chunk = (caddr_t)
1773 ((uintptr_t)ulw->ulw_base + i * lhp->lh_chunksize);
1774
1775 for (j = 0; j < maxndx; j++) {
1776 /* LINTED align */
1777 ulw->ulw_sorted[k++] = (umem_bufctl_audit_t *)chunk;
1778 chunk += UMEM_BUFCTL_AUDIT_SIZE;
1779 }
1780 }
1781
1782 qsort(ulw->ulw_sorted, k, sizeof (umem_bufctl_audit_t *),
1783 (int(*)(const void *, const void *))bufctlcmp);
1784
1785 ulw->ulw_maxndx = k;
1786 wsp->walk_data = ulw;
1787
1788 return (WALK_NEXT);
1789 }
1790
1791 int
umem_log_walk_step(mdb_walk_state_t * wsp)1792 umem_log_walk_step(mdb_walk_state_t *wsp)
1793 {
1794 umem_log_walk_t *ulw = wsp->walk_data;
1795 umem_bufctl_audit_t *bcp;
1796
1797 if (ulw->ulw_ndx == ulw->ulw_maxndx)
1798 return (WALK_DONE);
1799
1800 bcp = ulw->ulw_sorted[ulw->ulw_ndx++];
1801
1802 return (wsp->walk_callback((uintptr_t)bcp - (uintptr_t)ulw->ulw_base +
1803 (uintptr_t)ulw->ulw_lh.lh_base, bcp, wsp->walk_cbdata));
1804 }
1805
1806 void
umem_log_walk_fini(mdb_walk_state_t * wsp)1807 umem_log_walk_fini(mdb_walk_state_t *wsp)
1808 {
1809 umem_log_walk_t *ulw = wsp->walk_data;
1810
1811 mdb_free(ulw->ulw_base, ulw->ulw_size);
1812 mdb_free(ulw->ulw_sorted, ulw->ulw_maxndx *
1813 sizeof (umem_bufctl_audit_t *));
1814 mdb_free(ulw, sizeof (umem_log_walk_t));
1815 }
1816
1817 typedef struct allocdby_bufctl {
1818 uintptr_t abb_addr;
1819 hrtime_t abb_ts;
1820 } allocdby_bufctl_t;
1821
1822 typedef struct allocdby_walk {
1823 const char *abw_walk;
1824 uintptr_t abw_thread;
1825 size_t abw_nbufs;
1826 size_t abw_size;
1827 allocdby_bufctl_t *abw_buf;
1828 size_t abw_ndx;
1829 } allocdby_walk_t;
1830
1831 int
allocdby_walk_bufctl(uintptr_t addr,const umem_bufctl_audit_t * bcp,allocdby_walk_t * abw)1832 allocdby_walk_bufctl(uintptr_t addr, const umem_bufctl_audit_t *bcp,
1833 allocdby_walk_t *abw)
1834 {
1835 if ((uintptr_t)bcp->bc_thread != abw->abw_thread)
1836 return (WALK_NEXT);
1837
1838 if (abw->abw_nbufs == abw->abw_size) {
1839 allocdby_bufctl_t *buf;
1840 size_t oldsize = sizeof (allocdby_bufctl_t) * abw->abw_size;
1841
1842 buf = mdb_zalloc(oldsize << 1, UM_SLEEP);
1843
1844 bcopy(abw->abw_buf, buf, oldsize);
1845 mdb_free(abw->abw_buf, oldsize);
1846
1847 abw->abw_size <<= 1;
1848 abw->abw_buf = buf;
1849 }
1850
1851 abw->abw_buf[abw->abw_nbufs].abb_addr = addr;
1852 abw->abw_buf[abw->abw_nbufs].abb_ts = bcp->bc_timestamp;
1853 abw->abw_nbufs++;
1854
1855 return (WALK_NEXT);
1856 }
1857
1858 /*ARGSUSED*/
1859 int
allocdby_walk_cache(uintptr_t addr,const umem_cache_t * c,allocdby_walk_t * abw)1860 allocdby_walk_cache(uintptr_t addr, const umem_cache_t *c, allocdby_walk_t *abw)
1861 {
1862 if (mdb_pwalk(abw->abw_walk, (mdb_walk_cb_t)allocdby_walk_bufctl,
1863 abw, addr) == -1) {
1864 mdb_warn("couldn't walk bufctl for cache %p", addr);
1865 return (WALK_DONE);
1866 }
1867
1868 return (WALK_NEXT);
1869 }
1870
1871 static int
allocdby_cmp(const allocdby_bufctl_t * lhs,const allocdby_bufctl_t * rhs)1872 allocdby_cmp(const allocdby_bufctl_t *lhs, const allocdby_bufctl_t *rhs)
1873 {
1874 if (lhs->abb_ts < rhs->abb_ts)
1875 return (1);
1876 if (lhs->abb_ts > rhs->abb_ts)
1877 return (-1);
1878 return (0);
1879 }
1880
1881 static int
allocdby_walk_init_common(mdb_walk_state_t * wsp,const char * walk)1882 allocdby_walk_init_common(mdb_walk_state_t *wsp, const char *walk)
1883 {
1884 allocdby_walk_t *abw;
1885
1886 if (wsp->walk_addr == NULL) {
1887 mdb_warn("allocdby walk doesn't support global walks\n");
1888 return (WALK_ERR);
1889 }
1890
1891 abw = mdb_zalloc(sizeof (allocdby_walk_t), UM_SLEEP);
1892
1893 abw->abw_thread = wsp->walk_addr;
1894 abw->abw_walk = walk;
1895 abw->abw_size = 128; /* something reasonable */
1896 abw->abw_buf =
1897 mdb_zalloc(abw->abw_size * sizeof (allocdby_bufctl_t), UM_SLEEP);
1898
1899 wsp->walk_data = abw;
1900
1901 if (mdb_walk("umem_cache",
1902 (mdb_walk_cb_t)allocdby_walk_cache, abw) == -1) {
1903 mdb_warn("couldn't walk umem_cache");
1904 allocdby_walk_fini(wsp);
1905 return (WALK_ERR);
1906 }
1907
1908 qsort(abw->abw_buf, abw->abw_nbufs, sizeof (allocdby_bufctl_t),
1909 (int(*)(const void *, const void *))allocdby_cmp);
1910
1911 return (WALK_NEXT);
1912 }
1913
1914 int
allocdby_walk_init(mdb_walk_state_t * wsp)1915 allocdby_walk_init(mdb_walk_state_t *wsp)
1916 {
1917 return (allocdby_walk_init_common(wsp, "bufctl"));
1918 }
1919
1920 int
freedby_walk_init(mdb_walk_state_t * wsp)1921 freedby_walk_init(mdb_walk_state_t *wsp)
1922 {
1923 return (allocdby_walk_init_common(wsp, "freectl"));
1924 }
1925
1926 int
allocdby_walk_step(mdb_walk_state_t * wsp)1927 allocdby_walk_step(mdb_walk_state_t *wsp)
1928 {
1929 allocdby_walk_t *abw = wsp->walk_data;
1930 uintptr_t addr;
1931 umem_bufctl_audit_t *bcp;
1932 UMEM_LOCAL_BUFCTL_AUDIT(&bcp);
1933
1934 if (abw->abw_ndx == abw->abw_nbufs)
1935 return (WALK_DONE);
1936
1937 addr = abw->abw_buf[abw->abw_ndx++].abb_addr;
1938
1939 if (mdb_vread(bcp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
1940 mdb_warn("couldn't read bufctl at %p", addr);
1941 return (WALK_DONE);
1942 }
1943
1944 return (wsp->walk_callback(addr, bcp, wsp->walk_cbdata));
1945 }
1946
1947 void
allocdby_walk_fini(mdb_walk_state_t * wsp)1948 allocdby_walk_fini(mdb_walk_state_t *wsp)
1949 {
1950 allocdby_walk_t *abw = wsp->walk_data;
1951
1952 mdb_free(abw->abw_buf, sizeof (allocdby_bufctl_t) * abw->abw_size);
1953 mdb_free(abw, sizeof (allocdby_walk_t));
1954 }
1955
1956 /*ARGSUSED*/
1957 int
allocdby_walk(uintptr_t addr,const umem_bufctl_audit_t * bcp,void * ignored)1958 allocdby_walk(uintptr_t addr, const umem_bufctl_audit_t *bcp, void *ignored)
1959 {
1960 char c[MDB_SYM_NAMLEN];
1961 GElf_Sym sym;
1962 int i;
1963
1964 mdb_printf("%0?p %12llx ", addr, bcp->bc_timestamp);
1965 for (i = 0; i < bcp->bc_depth; i++) {
1966 if (mdb_lookup_by_addr(bcp->bc_stack[i],
1967 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
1968 continue;
1969 if (is_umem_sym(c, "umem_"))
1970 continue;
1971 mdb_printf("%s+0x%lx",
1972 c, bcp->bc_stack[i] - (uintptr_t)sym.st_value);
1973 break;
1974 }
1975 mdb_printf("\n");
1976
1977 return (WALK_NEXT);
1978 }
1979
1980 static int
allocdby_common(uintptr_t addr,uint_t flags,const char * w)1981 allocdby_common(uintptr_t addr, uint_t flags, const char *w)
1982 {
1983 if (!(flags & DCMD_ADDRSPEC))
1984 return (DCMD_USAGE);
1985
1986 mdb_printf("%-?s %12s %s\n", "BUFCTL", "TIMESTAMP", "CALLER");
1987
1988 if (mdb_pwalk(w, (mdb_walk_cb_t)allocdby_walk, NULL, addr) == -1) {
1989 mdb_warn("can't walk '%s' for %p", w, addr);
1990 return (DCMD_ERR);
1991 }
1992
1993 return (DCMD_OK);
1994 }
1995
1996 /*ARGSUSED*/
1997 int
allocdby(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)1998 allocdby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
1999 {
2000 return (allocdby_common(addr, flags, "allocdby"));
2001 }
2002
2003 /*ARGSUSED*/
2004 int
freedby(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2005 freedby(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2006 {
2007 return (allocdby_common(addr, flags, "freedby"));
2008 }
2009
2010 typedef struct whatis_info {
2011 mdb_whatis_t *wi_w;
2012 const umem_cache_t *wi_cache;
2013 const vmem_t *wi_vmem;
2014 vmem_t *wi_msb_arena;
2015 size_t wi_slab_size;
2016 int wi_slab_found;
2017 uint_t wi_freemem;
2018 } whatis_info_t;
2019
2020 /* call one of our dcmd functions with "-v" and the provided address */
2021 static void
whatis_call_printer(mdb_dcmd_f * dcmd,uintptr_t addr)2022 whatis_call_printer(mdb_dcmd_f *dcmd, uintptr_t addr)
2023 {
2024 mdb_arg_t a;
2025 a.a_type = MDB_TYPE_STRING;
2026 a.a_un.a_str = "-v";
2027
2028 mdb_printf(":\n");
2029 (void) (*dcmd)(addr, DCMD_ADDRSPEC, 1, &a);
2030 }
2031
2032 static void
whatis_print_umem(whatis_info_t * wi,uintptr_t maddr,uintptr_t addr,uintptr_t baddr)2033 whatis_print_umem(whatis_info_t *wi, uintptr_t maddr, uintptr_t addr,
2034 uintptr_t baddr)
2035 {
2036 mdb_whatis_t *w = wi->wi_w;
2037 const umem_cache_t *cp = wi->wi_cache;
2038 int quiet = (mdb_whatis_flags(w) & WHATIS_QUIET);
2039
2040 int call_printer = (!quiet && (cp->cache_flags & UMF_AUDIT));
2041
2042 mdb_whatis_report_object(w, maddr, addr, "");
2043
2044 if (baddr != 0 && !call_printer)
2045 mdb_printf("bufctl %p ", baddr);
2046
2047 mdb_printf("%s from %s",
2048 (wi->wi_freemem == FALSE) ? "allocated" : "freed", cp->cache_name);
2049
2050 if (call_printer && baddr != 0) {
2051 whatis_call_printer(bufctl, baddr);
2052 return;
2053 }
2054 mdb_printf("\n");
2055 }
2056
2057 /*ARGSUSED*/
2058 static int
whatis_walk_umem(uintptr_t addr,void * ignored,whatis_info_t * wi)2059 whatis_walk_umem(uintptr_t addr, void *ignored, whatis_info_t *wi)
2060 {
2061 mdb_whatis_t *w = wi->wi_w;
2062
2063 uintptr_t cur;
2064 size_t size = wi->wi_cache->cache_bufsize;
2065
2066 while (mdb_whatis_match(w, addr, size, &cur))
2067 whatis_print_umem(wi, cur, addr, NULL);
2068
2069 return (WHATIS_WALKRET(w));
2070 }
2071
2072 /*ARGSUSED*/
2073 static int
whatis_walk_bufctl(uintptr_t baddr,const umem_bufctl_t * bcp,whatis_info_t * wi)2074 whatis_walk_bufctl(uintptr_t baddr, const umem_bufctl_t *bcp, whatis_info_t *wi)
2075 {
2076 mdb_whatis_t *w = wi->wi_w;
2077
2078 uintptr_t cur;
2079 uintptr_t addr = (uintptr_t)bcp->bc_addr;
2080 size_t size = wi->wi_cache->cache_bufsize;
2081
2082 while (mdb_whatis_match(w, addr, size, &cur))
2083 whatis_print_umem(wi, cur, addr, baddr);
2084
2085 return (WHATIS_WALKRET(w));
2086 }
2087
2088
2089 static int
whatis_walk_seg(uintptr_t addr,const vmem_seg_t * vs,whatis_info_t * wi)2090 whatis_walk_seg(uintptr_t addr, const vmem_seg_t *vs, whatis_info_t *wi)
2091 {
2092 mdb_whatis_t *w = wi->wi_w;
2093
2094 size_t size = vs->vs_end - vs->vs_start;
2095 uintptr_t cur;
2096
2097 /* We're not interested in anything but alloc and free segments */
2098 if (vs->vs_type != VMEM_ALLOC && vs->vs_type != VMEM_FREE)
2099 return (WALK_NEXT);
2100
2101 while (mdb_whatis_match(w, vs->vs_start, size, &cur)) {
2102 mdb_whatis_report_object(w, cur, vs->vs_start, "");
2103
2104 /*
2105 * If we're not printing it seperately, provide the vmem_seg
2106 * pointer if it has a stack trace.
2107 */
2108 if ((mdb_whatis_flags(w) & WHATIS_QUIET) &&
2109 ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0 ||
2110 (vs->vs_type == VMEM_ALLOC && vs->vs_depth != 0))) {
2111 mdb_printf("vmem_seg %p ", addr);
2112 }
2113
2114 mdb_printf("%s from %s vmem arena",
2115 (vs->vs_type == VMEM_ALLOC) ? "allocated" : "freed",
2116 wi->wi_vmem->vm_name);
2117
2118 if (!mdb_whatis_flags(w) & WHATIS_QUIET)
2119 whatis_call_printer(vmem_seg, addr);
2120 else
2121 mdb_printf("\n");
2122 }
2123
2124 return (WHATIS_WALKRET(w));
2125 }
2126
2127 static int
whatis_walk_vmem(uintptr_t addr,const vmem_t * vmem,whatis_info_t * wi)2128 whatis_walk_vmem(uintptr_t addr, const vmem_t *vmem, whatis_info_t *wi)
2129 {
2130 mdb_whatis_t *w = wi->wi_w;
2131 const char *nm = vmem->vm_name;
2132 wi->wi_vmem = vmem;
2133
2134 if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2135 mdb_printf("Searching vmem arena %s...\n", nm);
2136
2137 if (mdb_pwalk("vmem_seg",
2138 (mdb_walk_cb_t)whatis_walk_seg, wi, addr) == -1) {
2139 mdb_warn("can't walk vmem seg for %p", addr);
2140 return (WALK_NEXT);
2141 }
2142
2143 return (WHATIS_WALKRET(w));
2144 }
2145
2146 /*ARGSUSED*/
2147 static int
whatis_walk_slab(uintptr_t saddr,const umem_slab_t * sp,whatis_info_t * wi)2148 whatis_walk_slab(uintptr_t saddr, const umem_slab_t *sp, whatis_info_t *wi)
2149 {
2150 mdb_whatis_t *w = wi->wi_w;
2151
2152 /* It must overlap with the slab data, or it's not interesting */
2153 if (mdb_whatis_overlaps(w,
2154 (uintptr_t)sp->slab_base, wi->wi_slab_size)) {
2155 wi->wi_slab_found++;
2156 return (WALK_DONE);
2157 }
2158 return (WALK_NEXT);
2159 }
2160
2161 static int
whatis_walk_cache(uintptr_t addr,const umem_cache_t * c,whatis_info_t * wi)2162 whatis_walk_cache(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi)
2163 {
2164 mdb_whatis_t *w = wi->wi_w;
2165 char *walk, *freewalk;
2166 mdb_walk_cb_t func;
2167 int do_bufctl;
2168
2169 /* Override the '-b' flag as necessary */
2170 if (!(c->cache_flags & UMF_HASH))
2171 do_bufctl = FALSE; /* no bufctls to walk */
2172 else if (c->cache_flags & UMF_AUDIT)
2173 do_bufctl = TRUE; /* we always want debugging info */
2174 else
2175 do_bufctl = ((mdb_whatis_flags(w) & WHATIS_BUFCTL) != 0);
2176
2177 if (do_bufctl) {
2178 walk = "bufctl";
2179 freewalk = "freectl";
2180 func = (mdb_walk_cb_t)whatis_walk_bufctl;
2181 } else {
2182 walk = "umem";
2183 freewalk = "freemem";
2184 func = (mdb_walk_cb_t)whatis_walk_umem;
2185 }
2186
2187 wi->wi_cache = c;
2188
2189 if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2190 mdb_printf("Searching %s...\n", c->cache_name);
2191
2192 /*
2193 * If more then two buffers live on each slab, figure out if we're
2194 * interested in anything in any slab before doing the more expensive
2195 * umem/freemem (bufctl/freectl) walkers.
2196 */
2197 wi->wi_slab_size = c->cache_slabsize - c->cache_maxcolor;
2198 if (!(c->cache_flags & UMF_HASH))
2199 wi->wi_slab_size -= sizeof (umem_slab_t);
2200
2201 if ((wi->wi_slab_size / c->cache_chunksize) > 2) {
2202 wi->wi_slab_found = 0;
2203 if (mdb_pwalk("umem_slab", (mdb_walk_cb_t)whatis_walk_slab, wi,
2204 addr) == -1) {
2205 mdb_warn("can't find umem_slab walker");
2206 return (WALK_DONE);
2207 }
2208 if (wi->wi_slab_found == 0)
2209 return (WALK_NEXT);
2210 }
2211
2212 wi->wi_freemem = FALSE;
2213 if (mdb_pwalk(walk, func, wi, addr) == -1) {
2214 mdb_warn("can't find %s walker", walk);
2215 return (WALK_DONE);
2216 }
2217
2218 if (mdb_whatis_done(w))
2219 return (WALK_DONE);
2220
2221 /*
2222 * We have searched for allocated memory; now search for freed memory.
2223 */
2224 if (mdb_whatis_flags(w) & WHATIS_VERBOSE)
2225 mdb_printf("Searching %s for free memory...\n", c->cache_name);
2226
2227 wi->wi_freemem = TRUE;
2228
2229 if (mdb_pwalk(freewalk, func, wi, addr) == -1) {
2230 mdb_warn("can't find %s walker", freewalk);
2231 return (WALK_DONE);
2232 }
2233
2234 return (WHATIS_WALKRET(w));
2235 }
2236
2237 static int
whatis_walk_touch(uintptr_t addr,const umem_cache_t * c,whatis_info_t * wi)2238 whatis_walk_touch(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi)
2239 {
2240 if (c->cache_arena == wi->wi_msb_arena ||
2241 (c->cache_cflags & UMC_NOTOUCH))
2242 return (WALK_NEXT);
2243
2244 return (whatis_walk_cache(addr, c, wi));
2245 }
2246
2247 static int
whatis_walk_metadata(uintptr_t addr,const umem_cache_t * c,whatis_info_t * wi)2248 whatis_walk_metadata(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi)
2249 {
2250 if (c->cache_arena != wi->wi_msb_arena)
2251 return (WALK_NEXT);
2252
2253 return (whatis_walk_cache(addr, c, wi));
2254 }
2255
2256 static int
whatis_walk_notouch(uintptr_t addr,const umem_cache_t * c,whatis_info_t * wi)2257 whatis_walk_notouch(uintptr_t addr, const umem_cache_t *c, whatis_info_t *wi)
2258 {
2259 if (c->cache_arena == wi->wi_msb_arena ||
2260 !(c->cache_cflags & UMC_NOTOUCH))
2261 return (WALK_NEXT);
2262
2263 return (whatis_walk_cache(addr, c, wi));
2264 }
2265
2266 /*ARGSUSED*/
2267 static int
whatis_run_umem(mdb_whatis_t * w,void * ignored)2268 whatis_run_umem(mdb_whatis_t *w, void *ignored)
2269 {
2270 whatis_info_t wi;
2271
2272 bzero(&wi, sizeof (wi));
2273 wi.wi_w = w;
2274
2275 /* umem's metadata is allocated from the umem_internal_arena */
2276 if (umem_readvar(&wi.wi_msb_arena, "umem_internal_arena") == -1)
2277 mdb_warn("unable to readvar \"umem_internal_arena\"");
2278
2279 /*
2280 * We process umem caches in the following order:
2281 *
2282 * non-UMC_NOTOUCH, non-metadata (typically the most interesting)
2283 * metadata (can be huge with UMF_AUDIT)
2284 * UMC_NOTOUCH, non-metadata (see umem_walk_all())
2285 */
2286 if (mdb_walk("umem_cache", (mdb_walk_cb_t)whatis_walk_touch,
2287 &wi) == -1 ||
2288 mdb_walk("umem_cache", (mdb_walk_cb_t)whatis_walk_metadata,
2289 &wi) == -1 ||
2290 mdb_walk("umem_cache", (mdb_walk_cb_t)whatis_walk_notouch,
2291 &wi) == -1) {
2292 mdb_warn("couldn't find umem_cache walker");
2293 return (1);
2294 }
2295 return (0);
2296 }
2297
2298 /*ARGSUSED*/
2299 static int
whatis_run_vmem(mdb_whatis_t * w,void * ignored)2300 whatis_run_vmem(mdb_whatis_t *w, void *ignored)
2301 {
2302 whatis_info_t wi;
2303
2304 bzero(&wi, sizeof (wi));
2305 wi.wi_w = w;
2306
2307 if (mdb_walk("vmem_postfix",
2308 (mdb_walk_cb_t)whatis_walk_vmem, &wi) == -1) {
2309 mdb_warn("couldn't find vmem_postfix walker");
2310 return (1);
2311 }
2312 return (0);
2313 }
2314
2315 int
umem_init(void)2316 umem_init(void)
2317 {
2318 mdb_walker_t w = {
2319 "umem_cache", "walk list of umem caches", umem_cache_walk_init,
2320 umem_cache_walk_step, umem_cache_walk_fini
2321 };
2322
2323 if (mdb_add_walker(&w) == -1) {
2324 mdb_warn("failed to add umem_cache walker");
2325 return (-1);
2326 }
2327
2328 if (umem_update_variables() == -1)
2329 return (-1);
2330
2331 /* install a callback so that our variables are always up-to-date */
2332 (void) mdb_callback_add(MDB_CALLBACK_STCHG, umem_statechange_cb, NULL);
2333 umem_statechange_cb(NULL);
2334
2335 /*
2336 * Register our ::whatis callbacks.
2337 */
2338 mdb_whatis_register("umem", whatis_run_umem, NULL,
2339 WHATIS_PRIO_ALLOCATOR, WHATIS_REG_NO_ID);
2340 mdb_whatis_register("vmem", whatis_run_vmem, NULL,
2341 WHATIS_PRIO_ALLOCATOR, WHATIS_REG_NO_ID);
2342
2343 return (0);
2344 }
2345
2346 typedef struct umem_log_cpu {
2347 uintptr_t umc_low;
2348 uintptr_t umc_high;
2349 } umem_log_cpu_t;
2350
2351 int
umem_log_walk(uintptr_t addr,const umem_bufctl_audit_t * b,umem_log_cpu_t * umc)2352 umem_log_walk(uintptr_t addr, const umem_bufctl_audit_t *b, umem_log_cpu_t *umc)
2353 {
2354 int i;
2355
2356 for (i = 0; i < umem_max_ncpus; i++) {
2357 if (addr >= umc[i].umc_low && addr < umc[i].umc_high)
2358 break;
2359 }
2360
2361 if (i == umem_max_ncpus)
2362 mdb_printf(" ");
2363 else
2364 mdb_printf("%3d", i);
2365
2366 mdb_printf(" %0?p %0?p %16llx %0?p\n", addr, b->bc_addr,
2367 b->bc_timestamp, b->bc_thread);
2368
2369 return (WALK_NEXT);
2370 }
2371
2372 /*ARGSUSED*/
2373 int
umem_log(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2374 umem_log(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2375 {
2376 umem_log_header_t lh;
2377 umem_cpu_log_header_t clh;
2378 uintptr_t lhp, clhp;
2379 umem_log_cpu_t *umc;
2380 int i;
2381
2382 if (umem_readvar(&lhp, "umem_transaction_log") == -1) {
2383 mdb_warn("failed to read 'umem_transaction_log'");
2384 return (DCMD_ERR);
2385 }
2386
2387 if (lhp == NULL) {
2388 mdb_warn("no umem transaction log\n");
2389 return (DCMD_ERR);
2390 }
2391
2392 if (mdb_vread(&lh, sizeof (umem_log_header_t), lhp) == -1) {
2393 mdb_warn("failed to read log header at %p", lhp);
2394 return (DCMD_ERR);
2395 }
2396
2397 clhp = lhp + ((uintptr_t)&lh.lh_cpu[0] - (uintptr_t)&lh);
2398
2399 umc = mdb_zalloc(sizeof (umem_log_cpu_t) * umem_max_ncpus,
2400 UM_SLEEP | UM_GC);
2401
2402 for (i = 0; i < umem_max_ncpus; i++) {
2403 if (mdb_vread(&clh, sizeof (clh), clhp) == -1) {
2404 mdb_warn("cannot read cpu %d's log header at %p",
2405 i, clhp);
2406 return (DCMD_ERR);
2407 }
2408
2409 umc[i].umc_low = clh.clh_chunk * lh.lh_chunksize +
2410 (uintptr_t)lh.lh_base;
2411 umc[i].umc_high = (uintptr_t)clh.clh_current;
2412
2413 clhp += sizeof (umem_cpu_log_header_t);
2414 }
2415
2416 if (DCMD_HDRSPEC(flags)) {
2417 mdb_printf("%3s %-?s %-?s %16s %-?s\n", "CPU", "ADDR",
2418 "BUFADDR", "TIMESTAMP", "THREAD");
2419 }
2420
2421 /*
2422 * If we have been passed an address, we'll just print out that
2423 * log entry.
2424 */
2425 if (flags & DCMD_ADDRSPEC) {
2426 umem_bufctl_audit_t *bp;
2427 UMEM_LOCAL_BUFCTL_AUDIT(&bp);
2428
2429 if (mdb_vread(bp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
2430 mdb_warn("failed to read bufctl at %p", addr);
2431 return (DCMD_ERR);
2432 }
2433
2434 (void) umem_log_walk(addr, bp, umc);
2435
2436 return (DCMD_OK);
2437 }
2438
2439 if (mdb_walk("umem_log", (mdb_walk_cb_t)umem_log_walk, umc) == -1) {
2440 mdb_warn("can't find umem log walker");
2441 return (DCMD_ERR);
2442 }
2443
2444 return (DCMD_OK);
2445 }
2446
2447 typedef struct bufctl_history_cb {
2448 int bhc_flags;
2449 int bhc_argc;
2450 const mdb_arg_t *bhc_argv;
2451 int bhc_ret;
2452 } bufctl_history_cb_t;
2453
2454 /*ARGSUSED*/
2455 static int
bufctl_history_callback(uintptr_t addr,const void * ign,void * arg)2456 bufctl_history_callback(uintptr_t addr, const void *ign, void *arg)
2457 {
2458 bufctl_history_cb_t *bhc = arg;
2459
2460 bhc->bhc_ret =
2461 bufctl(addr, bhc->bhc_flags, bhc->bhc_argc, bhc->bhc_argv);
2462
2463 bhc->bhc_flags &= ~DCMD_LOOPFIRST;
2464
2465 return ((bhc->bhc_ret == DCMD_OK)? WALK_NEXT : WALK_DONE);
2466 }
2467
2468 void
bufctl_help(void)2469 bufctl_help(void)
2470 {
2471 mdb_printf("%s\n",
2472 "Display the contents of umem_bufctl_audit_ts, with optional filtering.\n");
2473 mdb_dec_indent(2);
2474 mdb_printf("%<b>OPTIONS%</b>\n");
2475 mdb_inc_indent(2);
2476 mdb_printf("%s",
2477 " -v Display the full content of the bufctl, including its stack trace\n"
2478 " -h retrieve the bufctl's transaction history, if available\n"
2479 " -a addr\n"
2480 " filter out bufctls not involving the buffer at addr\n"
2481 " -c caller\n"
2482 " filter out bufctls without the function/PC in their stack trace\n"
2483 " -e earliest\n"
2484 " filter out bufctls timestamped before earliest\n"
2485 " -l latest\n"
2486 " filter out bufctls timestamped after latest\n"
2487 " -t thread\n"
2488 " filter out bufctls not involving thread\n");
2489 }
2490
2491 int
bufctl(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2492 bufctl(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2493 {
2494 uint_t verbose = FALSE;
2495 uint_t history = FALSE;
2496 uint_t in_history = FALSE;
2497 uintptr_t caller = NULL, thread = NULL;
2498 uintptr_t laddr, haddr, baddr = NULL;
2499 hrtime_t earliest = 0, latest = 0;
2500 int i, depth;
2501 char c[MDB_SYM_NAMLEN];
2502 GElf_Sym sym;
2503 umem_bufctl_audit_t *bcp;
2504 UMEM_LOCAL_BUFCTL_AUDIT(&bcp);
2505
2506 if (mdb_getopts(argc, argv,
2507 'v', MDB_OPT_SETBITS, TRUE, &verbose,
2508 'h', MDB_OPT_SETBITS, TRUE, &history,
2509 'H', MDB_OPT_SETBITS, TRUE, &in_history, /* internal */
2510 'c', MDB_OPT_UINTPTR, &caller,
2511 't', MDB_OPT_UINTPTR, &thread,
2512 'e', MDB_OPT_UINT64, &earliest,
2513 'l', MDB_OPT_UINT64, &latest,
2514 'a', MDB_OPT_UINTPTR, &baddr, NULL) != argc)
2515 return (DCMD_USAGE);
2516
2517 if (!(flags & DCMD_ADDRSPEC))
2518 return (DCMD_USAGE);
2519
2520 if (in_history && !history)
2521 return (DCMD_USAGE);
2522
2523 if (history && !in_history) {
2524 mdb_arg_t *nargv = mdb_zalloc(sizeof (*nargv) * (argc + 1),
2525 UM_SLEEP | UM_GC);
2526 bufctl_history_cb_t bhc;
2527
2528 nargv[0].a_type = MDB_TYPE_STRING;
2529 nargv[0].a_un.a_str = "-H"; /* prevent recursion */
2530
2531 for (i = 0; i < argc; i++)
2532 nargv[i + 1] = argv[i];
2533
2534 /*
2535 * When in history mode, we treat each element as if it
2536 * were in a seperate loop, so that the headers group
2537 * bufctls with similar histories.
2538 */
2539 bhc.bhc_flags = flags | DCMD_LOOP | DCMD_LOOPFIRST;
2540 bhc.bhc_argc = argc + 1;
2541 bhc.bhc_argv = nargv;
2542 bhc.bhc_ret = DCMD_OK;
2543
2544 if (mdb_pwalk("bufctl_history", bufctl_history_callback, &bhc,
2545 addr) == -1) {
2546 mdb_warn("unable to walk bufctl_history");
2547 return (DCMD_ERR);
2548 }
2549
2550 if (bhc.bhc_ret == DCMD_OK && !(flags & DCMD_PIPE_OUT))
2551 mdb_printf("\n");
2552
2553 return (bhc.bhc_ret);
2554 }
2555
2556 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
2557 if (verbose) {
2558 mdb_printf("%16s %16s %16s %16s\n"
2559 "%<u>%16s %16s %16s %16s%</u>\n",
2560 "ADDR", "BUFADDR", "TIMESTAMP", "THREAD",
2561 "", "CACHE", "LASTLOG", "CONTENTS");
2562 } else {
2563 mdb_printf("%<u>%-?s %-?s %-12s %5s %s%</u>\n",
2564 "ADDR", "BUFADDR", "TIMESTAMP", "THRD", "CALLER");
2565 }
2566 }
2567
2568 if (mdb_vread(bcp, UMEM_BUFCTL_AUDIT_SIZE, addr) == -1) {
2569 mdb_warn("couldn't read bufctl at %p", addr);
2570 return (DCMD_ERR);
2571 }
2572
2573 /*
2574 * Guard against bogus bc_depth in case the bufctl is corrupt or
2575 * the address does not really refer to a bufctl.
2576 */
2577 depth = MIN(bcp->bc_depth, umem_stack_depth);
2578
2579 if (caller != NULL) {
2580 laddr = caller;
2581 haddr = caller + sizeof (caller);
2582
2583 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c, sizeof (c),
2584 &sym) != -1 && caller == (uintptr_t)sym.st_value) {
2585 /*
2586 * We were provided an exact symbol value; any
2587 * address in the function is valid.
2588 */
2589 laddr = (uintptr_t)sym.st_value;
2590 haddr = (uintptr_t)sym.st_value + sym.st_size;
2591 }
2592
2593 for (i = 0; i < depth; i++)
2594 if (bcp->bc_stack[i] >= laddr &&
2595 bcp->bc_stack[i] < haddr)
2596 break;
2597
2598 if (i == depth)
2599 return (DCMD_OK);
2600 }
2601
2602 if (thread != NULL && (uintptr_t)bcp->bc_thread != thread)
2603 return (DCMD_OK);
2604
2605 if (earliest != 0 && bcp->bc_timestamp < earliest)
2606 return (DCMD_OK);
2607
2608 if (latest != 0 && bcp->bc_timestamp > latest)
2609 return (DCMD_OK);
2610
2611 if (baddr != 0 && (uintptr_t)bcp->bc_addr != baddr)
2612 return (DCMD_OK);
2613
2614 if (flags & DCMD_PIPE_OUT) {
2615 mdb_printf("%#r\n", addr);
2616 return (DCMD_OK);
2617 }
2618
2619 if (verbose) {
2620 mdb_printf(
2621 "%<b>%16p%</b> %16p %16llx %16d\n"
2622 "%16s %16p %16p %16p\n",
2623 addr, bcp->bc_addr, bcp->bc_timestamp, bcp->bc_thread,
2624 "", bcp->bc_cache, bcp->bc_lastlog, bcp->bc_contents);
2625
2626 mdb_inc_indent(17);
2627 for (i = 0; i < depth; i++)
2628 mdb_printf("%a\n", bcp->bc_stack[i]);
2629 mdb_dec_indent(17);
2630 mdb_printf("\n");
2631 } else {
2632 mdb_printf("%0?p %0?p %12llx %5d", addr, bcp->bc_addr,
2633 bcp->bc_timestamp, bcp->bc_thread);
2634
2635 for (i = 0; i < depth; i++) {
2636 if (mdb_lookup_by_addr(bcp->bc_stack[i],
2637 MDB_SYM_FUZZY, c, sizeof (c), &sym) == -1)
2638 continue;
2639 if (is_umem_sym(c, "umem_"))
2640 continue;
2641 mdb_printf(" %a\n", bcp->bc_stack[i]);
2642 break;
2643 }
2644
2645 if (i >= depth)
2646 mdb_printf("\n");
2647 }
2648
2649 return (DCMD_OK);
2650 }
2651
2652 /*ARGSUSED*/
2653 int
bufctl_audit(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2654 bufctl_audit(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2655 {
2656 mdb_arg_t a;
2657
2658 if (!(flags & DCMD_ADDRSPEC))
2659 return (DCMD_USAGE);
2660
2661 if (argc != 0)
2662 return (DCMD_USAGE);
2663
2664 a.a_type = MDB_TYPE_STRING;
2665 a.a_un.a_str = "-v";
2666
2667 return (bufctl(addr, flags, 1, &a));
2668 }
2669
2670 typedef struct umem_verify {
2671 uint64_t *umv_buf; /* buffer to read cache contents into */
2672 size_t umv_size; /* number of bytes in umv_buf */
2673 int umv_corruption; /* > 0 if corruption found. */
2674 int umv_besilent; /* report actual corruption sites */
2675 struct umem_cache umv_cache; /* the cache we're operating on */
2676 } umem_verify_t;
2677
2678 /*
2679 * verify_pattern()
2680 * verify that buf is filled with the pattern pat.
2681 */
2682 static int64_t
verify_pattern(uint64_t * buf_arg,size_t size,uint64_t pat)2683 verify_pattern(uint64_t *buf_arg, size_t size, uint64_t pat)
2684 {
2685 /*LINTED*/
2686 uint64_t *bufend = (uint64_t *)((char *)buf_arg + size);
2687 uint64_t *buf;
2688
2689 for (buf = buf_arg; buf < bufend; buf++)
2690 if (*buf != pat)
2691 return ((uintptr_t)buf - (uintptr_t)buf_arg);
2692 return (-1);
2693 }
2694
2695 /*
2696 * verify_buftag()
2697 * verify that btp->bt_bxstat == (bcp ^ pat)
2698 */
2699 static int
verify_buftag(umem_buftag_t * btp,uintptr_t pat)2700 verify_buftag(umem_buftag_t *btp, uintptr_t pat)
2701 {
2702 return (btp->bt_bxstat == ((intptr_t)btp->bt_bufctl ^ pat) ? 0 : -1);
2703 }
2704
2705 /*
2706 * verify_free()
2707 * verify the integrity of a free block of memory by checking
2708 * that it is filled with 0xdeadbeef and that its buftag is sane.
2709 */
2710 /*ARGSUSED1*/
2711 static int
verify_free(uintptr_t addr,const void * data,void * private)2712 verify_free(uintptr_t addr, const void *data, void *private)
2713 {
2714 umem_verify_t *umv = (umem_verify_t *)private;
2715 uint64_t *buf = umv->umv_buf; /* buf to validate */
2716 int64_t corrupt; /* corruption offset */
2717 umem_buftag_t *buftagp; /* ptr to buftag */
2718 umem_cache_t *cp = &umv->umv_cache;
2719 int besilent = umv->umv_besilent;
2720
2721 /*LINTED*/
2722 buftagp = UMEM_BUFTAG(cp, buf);
2723
2724 /*
2725 * Read the buffer to check.
2726 */
2727 if (mdb_vread(buf, umv->umv_size, addr) == -1) {
2728 if (!besilent)
2729 mdb_warn("couldn't read %p", addr);
2730 return (WALK_NEXT);
2731 }
2732
2733 if ((corrupt = verify_pattern(buf, cp->cache_verify,
2734 UMEM_FREE_PATTERN)) >= 0) {
2735 if (!besilent)
2736 mdb_printf("buffer %p (free) seems corrupted, at %p\n",
2737 addr, (uintptr_t)addr + corrupt);
2738 goto corrupt;
2739 }
2740
2741 if ((cp->cache_flags & UMF_HASH) &&
2742 buftagp->bt_redzone != UMEM_REDZONE_PATTERN) {
2743 if (!besilent)
2744 mdb_printf("buffer %p (free) seems to "
2745 "have a corrupt redzone pattern\n", addr);
2746 goto corrupt;
2747 }
2748
2749 /*
2750 * confirm bufctl pointer integrity.
2751 */
2752 if (verify_buftag(buftagp, UMEM_BUFTAG_FREE) == -1) {
2753 if (!besilent)
2754 mdb_printf("buffer %p (free) has a corrupt "
2755 "buftag\n", addr);
2756 goto corrupt;
2757 }
2758
2759 return (WALK_NEXT);
2760 corrupt:
2761 umv->umv_corruption++;
2762 return (WALK_NEXT);
2763 }
2764
2765 /*
2766 * verify_alloc()
2767 * Verify that the buftag of an allocated buffer makes sense with respect
2768 * to the buffer.
2769 */
2770 /*ARGSUSED1*/
2771 static int
verify_alloc(uintptr_t addr,const void * data,void * private)2772 verify_alloc(uintptr_t addr, const void *data, void *private)
2773 {
2774 umem_verify_t *umv = (umem_verify_t *)private;
2775 umem_cache_t *cp = &umv->umv_cache;
2776 uint64_t *buf = umv->umv_buf; /* buf to validate */
2777 /*LINTED*/
2778 umem_buftag_t *buftagp = UMEM_BUFTAG(cp, buf);
2779 uint32_t *ip = (uint32_t *)buftagp;
2780 uint8_t *bp = (uint8_t *)buf;
2781 int looks_ok = 0, size_ok = 1; /* flags for finding corruption */
2782 int besilent = umv->umv_besilent;
2783
2784 /*
2785 * Read the buffer to check.
2786 */
2787 if (mdb_vread(buf, umv->umv_size, addr) == -1) {
2788 if (!besilent)
2789 mdb_warn("couldn't read %p", addr);
2790 return (WALK_NEXT);
2791 }
2792
2793 /*
2794 * There are two cases to handle:
2795 * 1. If the buf was alloc'd using umem_cache_alloc, it will have
2796 * 0xfeedfacefeedface at the end of it
2797 * 2. If the buf was alloc'd using umem_alloc, it will have
2798 * 0xbb just past the end of the region in use. At the buftag,
2799 * it will have 0xfeedface (or, if the whole buffer is in use,
2800 * 0xfeedface & bb000000 or 0xfeedfacf & 000000bb depending on
2801 * endianness), followed by 32 bits containing the offset of the
2802 * 0xbb byte in the buffer.
2803 *
2804 * Finally, the two 32-bit words that comprise the second half of the
2805 * buftag should xor to UMEM_BUFTAG_ALLOC
2806 */
2807
2808 if (buftagp->bt_redzone == UMEM_REDZONE_PATTERN)
2809 looks_ok = 1;
2810 else if (!UMEM_SIZE_VALID(ip[1]))
2811 size_ok = 0;
2812 else if (bp[UMEM_SIZE_DECODE(ip[1])] == UMEM_REDZONE_BYTE)
2813 looks_ok = 1;
2814 else
2815 size_ok = 0;
2816
2817 if (!size_ok) {
2818 if (!besilent)
2819 mdb_printf("buffer %p (allocated) has a corrupt "
2820 "redzone size encoding\n", addr);
2821 goto corrupt;
2822 }
2823
2824 if (!looks_ok) {
2825 if (!besilent)
2826 mdb_printf("buffer %p (allocated) has a corrupt "
2827 "redzone signature\n", addr);
2828 goto corrupt;
2829 }
2830
2831 if (verify_buftag(buftagp, UMEM_BUFTAG_ALLOC) == -1) {
2832 if (!besilent)
2833 mdb_printf("buffer %p (allocated) has a "
2834 "corrupt buftag\n", addr);
2835 goto corrupt;
2836 }
2837
2838 return (WALK_NEXT);
2839 corrupt:
2840 umv->umv_corruption++;
2841 return (WALK_NEXT);
2842 }
2843
2844 /*ARGSUSED2*/
2845 int
umem_verify(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)2846 umem_verify(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
2847 {
2848 if (flags & DCMD_ADDRSPEC) {
2849 int check_alloc = 0, check_free = 0;
2850 umem_verify_t umv;
2851
2852 if (mdb_vread(&umv.umv_cache, sizeof (umv.umv_cache),
2853 addr) == -1) {
2854 mdb_warn("couldn't read umem_cache %p", addr);
2855 return (DCMD_ERR);
2856 }
2857
2858 umv.umv_size = umv.umv_cache.cache_buftag +
2859 sizeof (umem_buftag_t);
2860 umv.umv_buf = mdb_alloc(umv.umv_size, UM_SLEEP | UM_GC);
2861 umv.umv_corruption = 0;
2862
2863 if ((umv.umv_cache.cache_flags & UMF_REDZONE)) {
2864 check_alloc = 1;
2865 if (umv.umv_cache.cache_flags & UMF_DEADBEEF)
2866 check_free = 1;
2867 } else {
2868 if (!(flags & DCMD_LOOP)) {
2869 mdb_warn("cache %p (%s) does not have "
2870 "redzone checking enabled\n", addr,
2871 umv.umv_cache.cache_name);
2872 }
2873 return (DCMD_ERR);
2874 }
2875
2876 if (flags & DCMD_LOOP) {
2877 /*
2878 * table mode, don't print out every corrupt buffer
2879 */
2880 umv.umv_besilent = 1;
2881 } else {
2882 mdb_printf("Summary for cache '%s'\n",
2883 umv.umv_cache.cache_name);
2884 mdb_inc_indent(2);
2885 umv.umv_besilent = 0;
2886 }
2887
2888 if (check_alloc)
2889 (void) mdb_pwalk("umem", verify_alloc, &umv, addr);
2890 if (check_free)
2891 (void) mdb_pwalk("freemem", verify_free, &umv, addr);
2892
2893 if (flags & DCMD_LOOP) {
2894 if (umv.umv_corruption == 0) {
2895 mdb_printf("%-*s %?p clean\n",
2896 UMEM_CACHE_NAMELEN,
2897 umv.umv_cache.cache_name, addr);
2898 } else {
2899 char *s = ""; /* optional s in "buffer[s]" */
2900 if (umv.umv_corruption > 1)
2901 s = "s";
2902
2903 mdb_printf("%-*s %?p %d corrupt buffer%s\n",
2904 UMEM_CACHE_NAMELEN,
2905 umv.umv_cache.cache_name, addr,
2906 umv.umv_corruption, s);
2907 }
2908 } else {
2909 /*
2910 * This is the more verbose mode, when the user has
2911 * type addr::umem_verify. If the cache was clean,
2912 * nothing will have yet been printed. So say something.
2913 */
2914 if (umv.umv_corruption == 0)
2915 mdb_printf("clean\n");
2916
2917 mdb_dec_indent(2);
2918 }
2919 } else {
2920 /*
2921 * If the user didn't specify a cache to verify, we'll walk all
2922 * umem_cache's, specifying ourself as a callback for each...
2923 * this is the equivalent of '::walk umem_cache .::umem_verify'
2924 */
2925 mdb_printf("%<u>%-*s %-?s %-20s%</b>\n", UMEM_CACHE_NAMELEN,
2926 "Cache Name", "Addr", "Cache Integrity");
2927 (void) (mdb_walk_dcmd("umem_cache", "umem_verify", 0, NULL));
2928 }
2929
2930 return (DCMD_OK);
2931 }
2932
2933 typedef struct vmem_node {
2934 struct vmem_node *vn_next;
2935 struct vmem_node *vn_parent;
2936 struct vmem_node *vn_sibling;
2937 struct vmem_node *vn_children;
2938 uintptr_t vn_addr;
2939 int vn_marked;
2940 vmem_t vn_vmem;
2941 } vmem_node_t;
2942
2943 typedef struct vmem_walk {
2944 vmem_node_t *vw_root;
2945 vmem_node_t *vw_current;
2946 } vmem_walk_t;
2947
2948 int
vmem_walk_init(mdb_walk_state_t * wsp)2949 vmem_walk_init(mdb_walk_state_t *wsp)
2950 {
2951 uintptr_t vaddr, paddr;
2952 vmem_node_t *head = NULL, *root = NULL, *current = NULL, *parent, *vp;
2953 vmem_walk_t *vw;
2954
2955 if (umem_readvar(&vaddr, "vmem_list") == -1) {
2956 mdb_warn("couldn't read 'vmem_list'");
2957 return (WALK_ERR);
2958 }
2959
2960 while (vaddr != NULL) {
2961 vp = mdb_zalloc(sizeof (vmem_node_t), UM_SLEEP);
2962 vp->vn_addr = vaddr;
2963 vp->vn_next = head;
2964 head = vp;
2965
2966 if (vaddr == wsp->walk_addr)
2967 current = vp;
2968
2969 if (mdb_vread(&vp->vn_vmem, sizeof (vmem_t), vaddr) == -1) {
2970 mdb_warn("couldn't read vmem_t at %p", vaddr);
2971 goto err;
2972 }
2973
2974 vaddr = (uintptr_t)vp->vn_vmem.vm_next;
2975 }
2976
2977 for (vp = head; vp != NULL; vp = vp->vn_next) {
2978
2979 if ((paddr = (uintptr_t)vp->vn_vmem.vm_source) == NULL) {
2980 vp->vn_sibling = root;
2981 root = vp;
2982 continue;
2983 }
2984
2985 for (parent = head; parent != NULL; parent = parent->vn_next) {
2986 if (parent->vn_addr != paddr)
2987 continue;
2988 vp->vn_sibling = parent->vn_children;
2989 parent->vn_children = vp;
2990 vp->vn_parent = parent;
2991 break;
2992 }
2993
2994 if (parent == NULL) {
2995 mdb_warn("couldn't find %p's parent (%p)\n",
2996 vp->vn_addr, paddr);
2997 goto err;
2998 }
2999 }
3000
3001 vw = mdb_zalloc(sizeof (vmem_walk_t), UM_SLEEP);
3002 vw->vw_root = root;
3003
3004 if (current != NULL)
3005 vw->vw_current = current;
3006 else
3007 vw->vw_current = root;
3008
3009 wsp->walk_data = vw;
3010 return (WALK_NEXT);
3011 err:
3012 for (vp = head; head != NULL; vp = head) {
3013 head = vp->vn_next;
3014 mdb_free(vp, sizeof (vmem_node_t));
3015 }
3016
3017 return (WALK_ERR);
3018 }
3019
3020 int
vmem_walk_step(mdb_walk_state_t * wsp)3021 vmem_walk_step(mdb_walk_state_t *wsp)
3022 {
3023 vmem_walk_t *vw = wsp->walk_data;
3024 vmem_node_t *vp;
3025 int rval;
3026
3027 if ((vp = vw->vw_current) == NULL)
3028 return (WALK_DONE);
3029
3030 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3031
3032 if (vp->vn_children != NULL) {
3033 vw->vw_current = vp->vn_children;
3034 return (rval);
3035 }
3036
3037 do {
3038 vw->vw_current = vp->vn_sibling;
3039 vp = vp->vn_parent;
3040 } while (vw->vw_current == NULL && vp != NULL);
3041
3042 return (rval);
3043 }
3044
3045 /*
3046 * The "vmem_postfix" walk walks the vmem arenas in post-fix order; all
3047 * children are visited before their parent. We perform the postfix walk
3048 * iteratively (rather than recursively) to allow mdb to regain control
3049 * after each callback.
3050 */
3051 int
vmem_postfix_walk_step(mdb_walk_state_t * wsp)3052 vmem_postfix_walk_step(mdb_walk_state_t *wsp)
3053 {
3054 vmem_walk_t *vw = wsp->walk_data;
3055 vmem_node_t *vp = vw->vw_current;
3056 int rval;
3057
3058 /*
3059 * If this node is marked, then we know that we have already visited
3060 * all of its children. If the node has any siblings, they need to
3061 * be visited next; otherwise, we need to visit the parent. Note
3062 * that vp->vn_marked will only be zero on the first invocation of
3063 * the step function.
3064 */
3065 if (vp->vn_marked) {
3066 if (vp->vn_sibling != NULL)
3067 vp = vp->vn_sibling;
3068 else if (vp->vn_parent != NULL)
3069 vp = vp->vn_parent;
3070 else {
3071 /*
3072 * We have neither a parent, nor a sibling, and we
3073 * have already been visited; we're done.
3074 */
3075 return (WALK_DONE);
3076 }
3077 }
3078
3079 /*
3080 * Before we visit this node, visit its children.
3081 */
3082 while (vp->vn_children != NULL && !vp->vn_children->vn_marked)
3083 vp = vp->vn_children;
3084
3085 vp->vn_marked = 1;
3086 vw->vw_current = vp;
3087 rval = wsp->walk_callback(vp->vn_addr, &vp->vn_vmem, wsp->walk_cbdata);
3088
3089 return (rval);
3090 }
3091
3092 void
vmem_walk_fini(mdb_walk_state_t * wsp)3093 vmem_walk_fini(mdb_walk_state_t *wsp)
3094 {
3095 vmem_walk_t *vw = wsp->walk_data;
3096 vmem_node_t *root = vw->vw_root;
3097 int done;
3098
3099 if (root == NULL)
3100 return;
3101
3102 if ((vw->vw_root = root->vn_children) != NULL)
3103 vmem_walk_fini(wsp);
3104
3105 vw->vw_root = root->vn_sibling;
3106 done = (root->vn_sibling == NULL && root->vn_parent == NULL);
3107 mdb_free(root, sizeof (vmem_node_t));
3108
3109 if (done) {
3110 mdb_free(vw, sizeof (vmem_walk_t));
3111 } else {
3112 vmem_walk_fini(wsp);
3113 }
3114 }
3115
3116 typedef struct vmem_seg_walk {
3117 uint8_t vsw_type;
3118 uintptr_t vsw_start;
3119 uintptr_t vsw_current;
3120 } vmem_seg_walk_t;
3121
3122 /*ARGSUSED*/
3123 int
vmem_seg_walk_common_init(mdb_walk_state_t * wsp,uint8_t type,char * name)3124 vmem_seg_walk_common_init(mdb_walk_state_t *wsp, uint8_t type, char *name)
3125 {
3126 vmem_seg_walk_t *vsw;
3127
3128 if (wsp->walk_addr == NULL) {
3129 mdb_warn("vmem_%s does not support global walks\n", name);
3130 return (WALK_ERR);
3131 }
3132
3133 wsp->walk_data = vsw = mdb_alloc(sizeof (vmem_seg_walk_t), UM_SLEEP);
3134
3135 vsw->vsw_type = type;
3136 vsw->vsw_start = wsp->walk_addr + OFFSETOF(vmem_t, vm_seg0);
3137 vsw->vsw_current = vsw->vsw_start;
3138
3139 return (WALK_NEXT);
3140 }
3141
3142 /*
3143 * vmem segments can't have type 0 (this should be added to vmem_impl.h).
3144 */
3145 #define VMEM_NONE 0
3146
3147 int
vmem_alloc_walk_init(mdb_walk_state_t * wsp)3148 vmem_alloc_walk_init(mdb_walk_state_t *wsp)
3149 {
3150 return (vmem_seg_walk_common_init(wsp, VMEM_ALLOC, "alloc"));
3151 }
3152
3153 int
vmem_free_walk_init(mdb_walk_state_t * wsp)3154 vmem_free_walk_init(mdb_walk_state_t *wsp)
3155 {
3156 return (vmem_seg_walk_common_init(wsp, VMEM_FREE, "free"));
3157 }
3158
3159 int
vmem_span_walk_init(mdb_walk_state_t * wsp)3160 vmem_span_walk_init(mdb_walk_state_t *wsp)
3161 {
3162 return (vmem_seg_walk_common_init(wsp, VMEM_SPAN, "span"));
3163 }
3164
3165 int
vmem_seg_walk_init(mdb_walk_state_t * wsp)3166 vmem_seg_walk_init(mdb_walk_state_t *wsp)
3167 {
3168 return (vmem_seg_walk_common_init(wsp, VMEM_NONE, "seg"));
3169 }
3170
3171 int
vmem_seg_walk_step(mdb_walk_state_t * wsp)3172 vmem_seg_walk_step(mdb_walk_state_t *wsp)
3173 {
3174 vmem_seg_t seg;
3175 vmem_seg_walk_t *vsw = wsp->walk_data;
3176 uintptr_t addr = vsw->vsw_current;
3177 static size_t seg_size = 0;
3178 int rval;
3179
3180 if (!seg_size) {
3181 if (umem_readvar(&seg_size, "vmem_seg_size") == -1) {
3182 mdb_warn("failed to read 'vmem_seg_size'");
3183 seg_size = sizeof (vmem_seg_t);
3184 }
3185 }
3186
3187 if (seg_size < sizeof (seg))
3188 bzero((caddr_t)&seg + seg_size, sizeof (seg) - seg_size);
3189
3190 if (mdb_vread(&seg, seg_size, addr) == -1) {
3191 mdb_warn("couldn't read vmem_seg at %p", addr);
3192 return (WALK_ERR);
3193 }
3194
3195 vsw->vsw_current = (uintptr_t)seg.vs_anext;
3196 if (vsw->vsw_type != VMEM_NONE && seg.vs_type != vsw->vsw_type) {
3197 rval = WALK_NEXT;
3198 } else {
3199 rval = wsp->walk_callback(addr, &seg, wsp->walk_cbdata);
3200 }
3201
3202 if (vsw->vsw_current == vsw->vsw_start)
3203 return (WALK_DONE);
3204
3205 return (rval);
3206 }
3207
3208 void
vmem_seg_walk_fini(mdb_walk_state_t * wsp)3209 vmem_seg_walk_fini(mdb_walk_state_t *wsp)
3210 {
3211 vmem_seg_walk_t *vsw = wsp->walk_data;
3212
3213 mdb_free(vsw, sizeof (vmem_seg_walk_t));
3214 }
3215
3216 #define VMEM_NAMEWIDTH 22
3217
3218 int
vmem(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)3219 vmem(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3220 {
3221 vmem_t v, parent;
3222 uintptr_t paddr;
3223 int ident = 0;
3224 char c[VMEM_NAMEWIDTH];
3225
3226 if (!(flags & DCMD_ADDRSPEC)) {
3227 if (mdb_walk_dcmd("vmem", "vmem", argc, argv) == -1) {
3228 mdb_warn("can't walk vmem");
3229 return (DCMD_ERR);
3230 }
3231 return (DCMD_OK);
3232 }
3233
3234 if (DCMD_HDRSPEC(flags))
3235 mdb_printf("%-?s %-*s %10s %12s %9s %5s\n",
3236 "ADDR", VMEM_NAMEWIDTH, "NAME", "INUSE",
3237 "TOTAL", "SUCCEED", "FAIL");
3238
3239 if (mdb_vread(&v, sizeof (v), addr) == -1) {
3240 mdb_warn("couldn't read vmem at %p", addr);
3241 return (DCMD_ERR);
3242 }
3243
3244 for (paddr = (uintptr_t)v.vm_source; paddr != NULL; ident += 2) {
3245 if (mdb_vread(&parent, sizeof (parent), paddr) == -1) {
3246 mdb_warn("couldn't trace %p's ancestry", addr);
3247 ident = 0;
3248 break;
3249 }
3250 paddr = (uintptr_t)parent.vm_source;
3251 }
3252
3253 (void) mdb_snprintf(c, VMEM_NAMEWIDTH, "%*s%s", ident, "", v.vm_name);
3254
3255 mdb_printf("%0?p %-*s %10llu %12llu %9llu %5llu\n",
3256 addr, VMEM_NAMEWIDTH, c,
3257 v.vm_kstat.vk_mem_inuse, v.vm_kstat.vk_mem_total,
3258 v.vm_kstat.vk_alloc, v.vm_kstat.vk_fail);
3259
3260 return (DCMD_OK);
3261 }
3262
3263 void
vmem_seg_help(void)3264 vmem_seg_help(void)
3265 {
3266 mdb_printf("%s\n",
3267 "Display the contents of vmem_seg_ts, with optional filtering.\n"
3268 "\n"
3269 "A vmem_seg_t represents a range of addresses (or arbitrary numbers),\n"
3270 "representing a single chunk of data. Only ALLOC segments have debugging\n"
3271 "information.\n");
3272 mdb_dec_indent(2);
3273 mdb_printf("%<b>OPTIONS%</b>\n");
3274 mdb_inc_indent(2);
3275 mdb_printf("%s",
3276 " -v Display the full content of the vmem_seg, including its stack trace\n"
3277 " -s report the size of the segment, instead of the end address\n"
3278 " -c caller\n"
3279 " filter out segments without the function/PC in their stack trace\n"
3280 " -e earliest\n"
3281 " filter out segments timestamped before earliest\n"
3282 " -l latest\n"
3283 " filter out segments timestamped after latest\n"
3284 " -m minsize\n"
3285 " filer out segments smaller than minsize\n"
3286 " -M maxsize\n"
3287 " filer out segments larger than maxsize\n"
3288 " -t thread\n"
3289 " filter out segments not involving thread\n"
3290 " -T type\n"
3291 " filter out segments not of type 'type'\n"
3292 " type is one of: ALLOC/FREE/SPAN/ROTOR/WALKER\n");
3293 }
3294
3295
3296 /*ARGSUSED*/
3297 int
vmem_seg(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)3298 vmem_seg(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3299 {
3300 vmem_seg_t vs;
3301 uintptr_t *stk = vs.vs_stack;
3302 uintptr_t sz;
3303 uint8_t t;
3304 const char *type = NULL;
3305 GElf_Sym sym;
3306 char c[MDB_SYM_NAMLEN];
3307 int no_debug;
3308 int i;
3309 int depth;
3310 uintptr_t laddr, haddr;
3311
3312 uintptr_t caller = NULL, thread = NULL;
3313 uintptr_t minsize = 0, maxsize = 0;
3314
3315 hrtime_t earliest = 0, latest = 0;
3316
3317 uint_t size = 0;
3318 uint_t verbose = 0;
3319
3320 if (!(flags & DCMD_ADDRSPEC))
3321 return (DCMD_USAGE);
3322
3323 if (mdb_getopts(argc, argv,
3324 'c', MDB_OPT_UINTPTR, &caller,
3325 'e', MDB_OPT_UINT64, &earliest,
3326 'l', MDB_OPT_UINT64, &latest,
3327 's', MDB_OPT_SETBITS, TRUE, &size,
3328 'm', MDB_OPT_UINTPTR, &minsize,
3329 'M', MDB_OPT_UINTPTR, &maxsize,
3330 't', MDB_OPT_UINTPTR, &thread,
3331 'T', MDB_OPT_STR, &type,
3332 'v', MDB_OPT_SETBITS, TRUE, &verbose,
3333 NULL) != argc)
3334 return (DCMD_USAGE);
3335
3336 if (DCMD_HDRSPEC(flags) && !(flags & DCMD_PIPE_OUT)) {
3337 if (verbose) {
3338 mdb_printf("%16s %4s %16s %16s %16s\n"
3339 "%<u>%16s %4s %16s %16s %16s%</u>\n",
3340 "ADDR", "TYPE", "START", "END", "SIZE",
3341 "", "", "THREAD", "TIMESTAMP", "");
3342 } else {
3343 mdb_printf("%?s %4s %?s %?s %s\n", "ADDR", "TYPE",
3344 "START", size? "SIZE" : "END", "WHO");
3345 }
3346 }
3347
3348 if (mdb_vread(&vs, sizeof (vs), addr) == -1) {
3349 mdb_warn("couldn't read vmem_seg at %p", addr);
3350 return (DCMD_ERR);
3351 }
3352
3353 if (type != NULL) {
3354 if (strcmp(type, "ALLC") == 0 || strcmp(type, "ALLOC") == 0)
3355 t = VMEM_ALLOC;
3356 else if (strcmp(type, "FREE") == 0)
3357 t = VMEM_FREE;
3358 else if (strcmp(type, "SPAN") == 0)
3359 t = VMEM_SPAN;
3360 else if (strcmp(type, "ROTR") == 0 ||
3361 strcmp(type, "ROTOR") == 0)
3362 t = VMEM_ROTOR;
3363 else if (strcmp(type, "WLKR") == 0 ||
3364 strcmp(type, "WALKER") == 0)
3365 t = VMEM_WALKER;
3366 else {
3367 mdb_warn("\"%s\" is not a recognized vmem_seg type\n",
3368 type);
3369 return (DCMD_ERR);
3370 }
3371
3372 if (vs.vs_type != t)
3373 return (DCMD_OK);
3374 }
3375
3376 sz = vs.vs_end - vs.vs_start;
3377
3378 if (minsize != 0 && sz < minsize)
3379 return (DCMD_OK);
3380
3381 if (maxsize != 0 && sz > maxsize)
3382 return (DCMD_OK);
3383
3384 t = vs.vs_type;
3385 depth = vs.vs_depth;
3386
3387 /*
3388 * debug info, when present, is only accurate for VMEM_ALLOC segments
3389 */
3390 no_debug = (t != VMEM_ALLOC) ||
3391 (depth == 0 || depth > VMEM_STACK_DEPTH);
3392
3393 if (no_debug) {
3394 if (caller != NULL || thread != NULL || earliest != 0 ||
3395 latest != 0)
3396 return (DCMD_OK); /* not enough info */
3397 } else {
3398 if (caller != NULL) {
3399 laddr = caller;
3400 haddr = caller + sizeof (caller);
3401
3402 if (mdb_lookup_by_addr(caller, MDB_SYM_FUZZY, c,
3403 sizeof (c), &sym) != -1 &&
3404 caller == (uintptr_t)sym.st_value) {
3405 /*
3406 * We were provided an exact symbol value; any
3407 * address in the function is valid.
3408 */
3409 laddr = (uintptr_t)sym.st_value;
3410 haddr = (uintptr_t)sym.st_value + sym.st_size;
3411 }
3412
3413 for (i = 0; i < depth; i++)
3414 if (vs.vs_stack[i] >= laddr &&
3415 vs.vs_stack[i] < haddr)
3416 break;
3417
3418 if (i == depth)
3419 return (DCMD_OK);
3420 }
3421
3422 if (thread != NULL && (uintptr_t)vs.vs_thread != thread)
3423 return (DCMD_OK);
3424
3425 if (earliest != 0 && vs.vs_timestamp < earliest)
3426 return (DCMD_OK);
3427
3428 if (latest != 0 && vs.vs_timestamp > latest)
3429 return (DCMD_OK);
3430 }
3431
3432 type = (t == VMEM_ALLOC ? "ALLC" :
3433 t == VMEM_FREE ? "FREE" :
3434 t == VMEM_SPAN ? "SPAN" :
3435 t == VMEM_ROTOR ? "ROTR" :
3436 t == VMEM_WALKER ? "WLKR" :
3437 "????");
3438
3439 if (flags & DCMD_PIPE_OUT) {
3440 mdb_printf("%#r\n", addr);
3441 return (DCMD_OK);
3442 }
3443
3444 if (verbose) {
3445 mdb_printf("%<b>%16p%</b> %4s %16p %16p %16d\n",
3446 addr, type, vs.vs_start, vs.vs_end, sz);
3447
3448 if (no_debug)
3449 return (DCMD_OK);
3450
3451 mdb_printf("%16s %4s %16d %16llx\n",
3452 "", "", vs.vs_thread, vs.vs_timestamp);
3453
3454 mdb_inc_indent(17);
3455 for (i = 0; i < depth; i++) {
3456 mdb_printf("%a\n", stk[i]);
3457 }
3458 mdb_dec_indent(17);
3459 mdb_printf("\n");
3460 } else {
3461 mdb_printf("%0?p %4s %0?p %0?p", addr, type,
3462 vs.vs_start, size? sz : vs.vs_end);
3463
3464 if (no_debug) {
3465 mdb_printf("\n");
3466 return (DCMD_OK);
3467 }
3468
3469 for (i = 0; i < depth; i++) {
3470 if (mdb_lookup_by_addr(stk[i], MDB_SYM_FUZZY,
3471 c, sizeof (c), &sym) == -1)
3472 continue;
3473 if (is_umem_sym(c, "vmem_"))
3474 continue;
3475 break;
3476 }
3477 mdb_printf(" %a\n", stk[i]);
3478 }
3479 return (DCMD_OK);
3480 }
3481
3482 /*ARGSUSED*/
3483 static int
showbc(uintptr_t addr,const umem_bufctl_audit_t * bcp,hrtime_t * newest)3484 showbc(uintptr_t addr, const umem_bufctl_audit_t *bcp, hrtime_t *newest)
3485 {
3486 char name[UMEM_CACHE_NAMELEN + 1];
3487 hrtime_t delta;
3488 int i, depth;
3489
3490 if (bcp->bc_timestamp == 0)
3491 return (WALK_DONE);
3492
3493 if (*newest == 0)
3494 *newest = bcp->bc_timestamp;
3495
3496 delta = *newest - bcp->bc_timestamp;
3497 depth = MIN(bcp->bc_depth, umem_stack_depth);
3498
3499 if (mdb_readstr(name, sizeof (name), (uintptr_t)
3500 &bcp->bc_cache->cache_name) <= 0)
3501 (void) mdb_snprintf(name, sizeof (name), "%a", bcp->bc_cache);
3502
3503 mdb_printf("\nT-%lld.%09lld addr=%p %s\n",
3504 delta / NANOSEC, delta % NANOSEC, bcp->bc_addr, name);
3505
3506 for (i = 0; i < depth; i++)
3507 mdb_printf("\t %a\n", bcp->bc_stack[i]);
3508
3509 return (WALK_NEXT);
3510 }
3511
3512 int
umalog(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)3513 umalog(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3514 {
3515 const char *logname = "umem_transaction_log";
3516 hrtime_t newest = 0;
3517
3518 if ((flags & DCMD_ADDRSPEC) || argc > 1)
3519 return (DCMD_USAGE);
3520
3521 if (argc > 0) {
3522 if (argv->a_type != MDB_TYPE_STRING)
3523 return (DCMD_USAGE);
3524 if (strcmp(argv->a_un.a_str, "fail") == 0)
3525 logname = "umem_failure_log";
3526 else if (strcmp(argv->a_un.a_str, "slab") == 0)
3527 logname = "umem_slab_log";
3528 else
3529 return (DCMD_USAGE);
3530 }
3531
3532 if (umem_readvar(&addr, logname) == -1) {
3533 mdb_warn("failed to read %s log header pointer");
3534 return (DCMD_ERR);
3535 }
3536
3537 if (mdb_pwalk("umem_log", (mdb_walk_cb_t)showbc, &newest, addr) == -1) {
3538 mdb_warn("failed to walk umem log");
3539 return (DCMD_ERR);
3540 }
3541
3542 return (DCMD_OK);
3543 }
3544
3545 /*
3546 * As the final lure for die-hard crash(1M) users, we provide ::umausers here.
3547 * The first piece is a structure which we use to accumulate umem_cache_t
3548 * addresses of interest. The umc_add is used as a callback for the umem_cache
3549 * walker; we either add all caches, or ones named explicitly as arguments.
3550 */
3551
3552 typedef struct umclist {
3553 const char *umc_name; /* Name to match (or NULL) */
3554 uintptr_t *umc_caches; /* List of umem_cache_t addrs */
3555 int umc_nelems; /* Num entries in umc_caches */
3556 int umc_size; /* Size of umc_caches array */
3557 } umclist_t;
3558
3559 static int
umc_add(uintptr_t addr,const umem_cache_t * cp,umclist_t * umc)3560 umc_add(uintptr_t addr, const umem_cache_t *cp, umclist_t *umc)
3561 {
3562 void *p;
3563 int s;
3564
3565 if (umc->umc_name == NULL ||
3566 strcmp(cp->cache_name, umc->umc_name) == 0) {
3567 /*
3568 * If we have a match, grow our array (if necessary), and then
3569 * add the virtual address of the matching cache to our list.
3570 */
3571 if (umc->umc_nelems >= umc->umc_size) {
3572 s = umc->umc_size ? umc->umc_size * 2 : 256;
3573 p = mdb_alloc(sizeof (uintptr_t) * s, UM_SLEEP | UM_GC);
3574
3575 bcopy(umc->umc_caches, p,
3576 sizeof (uintptr_t) * umc->umc_size);
3577
3578 umc->umc_caches = p;
3579 umc->umc_size = s;
3580 }
3581
3582 umc->umc_caches[umc->umc_nelems++] = addr;
3583 return (umc->umc_name ? WALK_DONE : WALK_NEXT);
3584 }
3585
3586 return (WALK_NEXT);
3587 }
3588
3589 /*
3590 * The second piece of ::umausers is a hash table of allocations. Each
3591 * allocation owner is identified by its stack trace and data_size. We then
3592 * track the total bytes of all such allocations, and the number of allocations
3593 * to report at the end. Once we have a list of caches, we walk through the
3594 * allocated bufctls of each, and update our hash table accordingly.
3595 */
3596
3597 typedef struct umowner {
3598 struct umowner *umo_head; /* First hash elt in bucket */
3599 struct umowner *umo_next; /* Next hash elt in chain */
3600 size_t umo_signature; /* Hash table signature */
3601 uint_t umo_num; /* Number of allocations */
3602 size_t umo_data_size; /* Size of each allocation */
3603 size_t umo_total_size; /* Total bytes of allocation */
3604 int umo_depth; /* Depth of stack trace */
3605 uintptr_t *umo_stack; /* Stack trace */
3606 } umowner_t;
3607
3608 typedef struct umusers {
3609 const umem_cache_t *umu_cache; /* Current umem cache */
3610 umowner_t *umu_hash; /* Hash table of owners */
3611 uintptr_t *umu_stacks; /* stacks for owners */
3612 int umu_nelems; /* Number of entries in use */
3613 int umu_size; /* Total number of entries */
3614 } umusers_t;
3615
3616 static void
umu_add(umusers_t * umu,const umem_bufctl_audit_t * bcp,size_t size,size_t data_size)3617 umu_add(umusers_t *umu, const umem_bufctl_audit_t *bcp,
3618 size_t size, size_t data_size)
3619 {
3620 int i, depth = MIN(bcp->bc_depth, umem_stack_depth);
3621 size_t bucket, signature = data_size;
3622 umowner_t *umo, *umoend;
3623
3624 /*
3625 * If the hash table is full, double its size and rehash everything.
3626 */
3627 if (umu->umu_nelems >= umu->umu_size) {
3628 int s = umu->umu_size ? umu->umu_size * 2 : 1024;
3629 size_t umowner_size = sizeof (umowner_t);
3630 size_t trace_size = umem_stack_depth * sizeof (uintptr_t);
3631 uintptr_t *new_stacks;
3632
3633 umo = mdb_alloc(umowner_size * s, UM_SLEEP | UM_GC);
3634 new_stacks = mdb_alloc(trace_size * s, UM_SLEEP | UM_GC);
3635
3636 bcopy(umu->umu_hash, umo, umowner_size * umu->umu_size);
3637 bcopy(umu->umu_stacks, new_stacks, trace_size * umu->umu_size);
3638 umu->umu_hash = umo;
3639 umu->umu_stacks = new_stacks;
3640 umu->umu_size = s;
3641
3642 umoend = umu->umu_hash + umu->umu_size;
3643 for (umo = umu->umu_hash; umo < umoend; umo++) {
3644 umo->umo_head = NULL;
3645 umo->umo_stack = &umu->umu_stacks[
3646 umem_stack_depth * (umo - umu->umu_hash)];
3647 }
3648
3649 umoend = umu->umu_hash + umu->umu_nelems;
3650 for (umo = umu->umu_hash; umo < umoend; umo++) {
3651 bucket = umo->umo_signature & (umu->umu_size - 1);
3652 umo->umo_next = umu->umu_hash[bucket].umo_head;
3653 umu->umu_hash[bucket].umo_head = umo;
3654 }
3655 }
3656
3657 /*
3658 * Finish computing the hash signature from the stack trace, and then
3659 * see if the owner is in the hash table. If so, update our stats.
3660 */
3661 for (i = 0; i < depth; i++)
3662 signature += bcp->bc_stack[i];
3663
3664 bucket = signature & (umu->umu_size - 1);
3665
3666 for (umo = umu->umu_hash[bucket].umo_head; umo; umo = umo->umo_next) {
3667 if (umo->umo_signature == signature) {
3668 size_t difference = 0;
3669
3670 difference |= umo->umo_data_size - data_size;
3671 difference |= umo->umo_depth - depth;
3672
3673 for (i = 0; i < depth; i++) {
3674 difference |= umo->umo_stack[i] -
3675 bcp->bc_stack[i];
3676 }
3677
3678 if (difference == 0) {
3679 umo->umo_total_size += size;
3680 umo->umo_num++;
3681 return;
3682 }
3683 }
3684 }
3685
3686 /*
3687 * If the owner is not yet hashed, grab the next element and fill it
3688 * in based on the allocation information.
3689 */
3690 umo = &umu->umu_hash[umu->umu_nelems++];
3691 umo->umo_next = umu->umu_hash[bucket].umo_head;
3692 umu->umu_hash[bucket].umo_head = umo;
3693
3694 umo->umo_signature = signature;
3695 umo->umo_num = 1;
3696 umo->umo_data_size = data_size;
3697 umo->umo_total_size = size;
3698 umo->umo_depth = depth;
3699
3700 for (i = 0; i < depth; i++)
3701 umo->umo_stack[i] = bcp->bc_stack[i];
3702 }
3703
3704 /*
3705 * When ::umausers is invoked without the -f flag, we simply update our hash
3706 * table with the information from each allocated bufctl.
3707 */
3708 /*ARGSUSED*/
3709 static int
umause1(uintptr_t addr,const umem_bufctl_audit_t * bcp,umusers_t * umu)3710 umause1(uintptr_t addr, const umem_bufctl_audit_t *bcp, umusers_t *umu)
3711 {
3712 const umem_cache_t *cp = umu->umu_cache;
3713
3714 umu_add(umu, bcp, cp->cache_bufsize, cp->cache_bufsize);
3715 return (WALK_NEXT);
3716 }
3717
3718 /*
3719 * When ::umausers is invoked with the -f flag, we print out the information
3720 * for each bufctl as well as updating the hash table.
3721 */
3722 static int
umause2(uintptr_t addr,const umem_bufctl_audit_t * bcp,umusers_t * umu)3723 umause2(uintptr_t addr, const umem_bufctl_audit_t *bcp, umusers_t *umu)
3724 {
3725 int i, depth = MIN(bcp->bc_depth, umem_stack_depth);
3726 const umem_cache_t *cp = umu->umu_cache;
3727
3728 mdb_printf("size %d, addr %p, thread %p, cache %s\n",
3729 cp->cache_bufsize, addr, bcp->bc_thread, cp->cache_name);
3730
3731 for (i = 0; i < depth; i++)
3732 mdb_printf("\t %a\n", bcp->bc_stack[i]);
3733
3734 umu_add(umu, bcp, cp->cache_bufsize, cp->cache_bufsize);
3735 return (WALK_NEXT);
3736 }
3737
3738 /*
3739 * We sort our results by allocation size before printing them.
3740 */
3741 static int
umownercmp(const void * lp,const void * rp)3742 umownercmp(const void *lp, const void *rp)
3743 {
3744 const umowner_t *lhs = lp;
3745 const umowner_t *rhs = rp;
3746
3747 return (rhs->umo_total_size - lhs->umo_total_size);
3748 }
3749
3750 /*
3751 * The main engine of ::umausers is relatively straightforward: First we
3752 * accumulate our list of umem_cache_t addresses into the umclist_t. Next we
3753 * iterate over the allocated bufctls of each cache in the list. Finally,
3754 * we sort and print our results.
3755 */
3756 /*ARGSUSED*/
3757 int
umausers(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)3758 umausers(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
3759 {
3760 int mem_threshold = 8192; /* Minimum # bytes for printing */
3761 int cnt_threshold = 100; /* Minimum # blocks for printing */
3762 int audited_caches = 0; /* Number of UMF_AUDIT caches found */
3763 int do_all_caches = 1; /* Do all caches (no arguments) */
3764 int opt_e = FALSE; /* Include "small" users */
3765 int opt_f = FALSE; /* Print stack traces */
3766
3767 mdb_walk_cb_t callback = (mdb_walk_cb_t)umause1;
3768 umowner_t *umo, *umoend;
3769 int i, oelems;
3770
3771 umclist_t umc;
3772 umusers_t umu;
3773
3774 if (flags & DCMD_ADDRSPEC)
3775 return (DCMD_USAGE);
3776
3777 bzero(&umc, sizeof (umc));
3778 bzero(&umu, sizeof (umu));
3779
3780 while ((i = mdb_getopts(argc, argv,
3781 'e', MDB_OPT_SETBITS, TRUE, &opt_e,
3782 'f', MDB_OPT_SETBITS, TRUE, &opt_f, NULL)) != argc) {
3783
3784 argv += i; /* skip past options we just processed */
3785 argc -= i; /* adjust argc */
3786
3787 if (argv->a_type != MDB_TYPE_STRING || *argv->a_un.a_str == '-')
3788 return (DCMD_USAGE);
3789
3790 oelems = umc.umc_nelems;
3791 umc.umc_name = argv->a_un.a_str;
3792 (void) mdb_walk("umem_cache", (mdb_walk_cb_t)umc_add, &umc);
3793
3794 if (umc.umc_nelems == oelems) {
3795 mdb_warn("unknown umem cache: %s\n", umc.umc_name);
3796 return (DCMD_ERR);
3797 }
3798
3799 do_all_caches = 0;
3800 argv++;
3801 argc--;
3802 }
3803
3804 if (opt_e)
3805 mem_threshold = cnt_threshold = 0;
3806
3807 if (opt_f)
3808 callback = (mdb_walk_cb_t)umause2;
3809
3810 if (do_all_caches) {
3811 umc.umc_name = NULL; /* match all cache names */
3812 (void) mdb_walk("umem_cache", (mdb_walk_cb_t)umc_add, &umc);
3813 }
3814
3815 for (i = 0; i < umc.umc_nelems; i++) {
3816 uintptr_t cp = umc.umc_caches[i];
3817 umem_cache_t c;
3818
3819 if (mdb_vread(&c, sizeof (c), cp) == -1) {
3820 mdb_warn("failed to read cache at %p", cp);
3821 continue;
3822 }
3823
3824 if (!(c.cache_flags & UMF_AUDIT)) {
3825 if (!do_all_caches) {
3826 mdb_warn("UMF_AUDIT is not enabled for %s\n",
3827 c.cache_name);
3828 }
3829 continue;
3830 }
3831
3832 umu.umu_cache = &c;
3833 (void) mdb_pwalk("bufctl", callback, &umu, cp);
3834 audited_caches++;
3835 }
3836
3837 if (audited_caches == 0 && do_all_caches) {
3838 mdb_warn("UMF_AUDIT is not enabled for any caches\n");
3839 return (DCMD_ERR);
3840 }
3841
3842 qsort(umu.umu_hash, umu.umu_nelems, sizeof (umowner_t), umownercmp);
3843 umoend = umu.umu_hash + umu.umu_nelems;
3844
3845 for (umo = umu.umu_hash; umo < umoend; umo++) {
3846 if (umo->umo_total_size < mem_threshold &&
3847 umo->umo_num < cnt_threshold)
3848 continue;
3849 mdb_printf("%lu bytes for %u allocations with data size %lu:\n",
3850 umo->umo_total_size, umo->umo_num, umo->umo_data_size);
3851 for (i = 0; i < umo->umo_depth; i++)
3852 mdb_printf("\t %a\n", umo->umo_stack[i]);
3853 }
3854
3855 return (DCMD_OK);
3856 }
3857
3858 struct malloc_data {
3859 uint32_t malloc_size;
3860 uint32_t malloc_stat; /* == UMEM_MALLOC_ENCODE(state, malloc_size) */
3861 };
3862
3863 #ifdef _LP64
3864 #define UMI_MAX_BUCKET (UMEM_MAXBUF - 2*sizeof (struct malloc_data))
3865 #else
3866 #define UMI_MAX_BUCKET (UMEM_MAXBUF - sizeof (struct malloc_data))
3867 #endif
3868
3869 typedef struct umem_malloc_info {
3870 size_t um_total; /* total allocated buffers */
3871 size_t um_malloc; /* malloc buffers */
3872 size_t um_malloc_size; /* sum of malloc buffer sizes */
3873 size_t um_malloc_overhead; /* sum of in-chunk overheads */
3874
3875 umem_cache_t *um_cp;
3876
3877 uint_t *um_bucket;
3878 } umem_malloc_info_t;
3879
3880 static void
umem_malloc_print_dist(uint_t * um_bucket,size_t minmalloc,size_t maxmalloc,size_t maxbuckets,size_t minbucketsize,int geometric)3881 umem_malloc_print_dist(uint_t *um_bucket, size_t minmalloc, size_t maxmalloc,
3882 size_t maxbuckets, size_t minbucketsize, int geometric)
3883 {
3884 uint64_t um_malloc;
3885 int minb = -1;
3886 int maxb = -1;
3887 int buckets;
3888 int nbucks;
3889 int i;
3890 int b;
3891 const int *distarray;
3892
3893 minb = (int)minmalloc;
3894 maxb = (int)maxmalloc;
3895
3896 nbucks = buckets = maxb - minb + 1;
3897
3898 um_malloc = 0;
3899 for (b = minb; b <= maxb; b++)
3900 um_malloc += um_bucket[b];
3901
3902 if (maxbuckets != 0)
3903 buckets = MIN(buckets, maxbuckets);
3904
3905 if (minbucketsize > 1) {
3906 buckets = MIN(buckets, nbucks/minbucketsize);
3907 if (buckets == 0) {
3908 buckets = 1;
3909 minbucketsize = nbucks;
3910 }
3911 }
3912
3913 if (geometric)
3914 distarray = dist_geometric(buckets, minb, maxb, minbucketsize);
3915 else
3916 distarray = dist_linear(buckets, minb, maxb);
3917
3918 dist_print_header("malloc size", 11, "count");
3919 for (i = 0; i < buckets; i++) {
3920 dist_print_bucket(distarray, i, um_bucket, um_malloc, 11);
3921 }
3922 mdb_printf("\n");
3923 }
3924
3925 /*
3926 * A malloc()ed buffer looks like:
3927 *
3928 * <----------- mi.malloc_size --->
3929 * <----------- cp.cache_bufsize ------------------>
3930 * <----------- cp.cache_chunksize -------------------------------->
3931 * +-------+-----------------------+---------------+---------------+
3932 * |/tag///| mallocsz |/round-off/////|/debug info////|
3933 * +-------+---------------------------------------+---------------+
3934 * <-- usable space ------>
3935 *
3936 * mallocsz is the argument to malloc(3C).
3937 * mi.malloc_size is the actual size passed to umem_alloc(), which
3938 * is rounded up to the smallest available cache size, which is
3939 * cache_bufsize. If there is debugging or alignment overhead in
3940 * the cache, that is reflected in a larger cache_chunksize.
3941 *
3942 * The tag at the beginning of the buffer is either 8-bytes or 16-bytes,
3943 * depending upon the ISA's alignment requirements. For 32-bit allocations,
3944 * it is always a 8-byte tag. For 64-bit allocations larger than 8 bytes,
3945 * the tag has 8 bytes of padding before it.
3946 *
3947 * 32-byte, 64-byte buffers <= 8 bytes:
3948 * +-------+-------+--------- ...
3949 * |/size//|/stat//| mallocsz ...
3950 * +-------+-------+--------- ...
3951 * ^
3952 * pointer returned from malloc(3C)
3953 *
3954 * 64-byte buffers > 8 bytes:
3955 * +---------------+-------+-------+--------- ...
3956 * |/padding///////|/size//|/stat//| mallocsz ...
3957 * +---------------+-------+-------+--------- ...
3958 * ^
3959 * pointer returned from malloc(3C)
3960 *
3961 * The "size" field is "malloc_size", which is mallocsz + the padding.
3962 * The "stat" field is derived from malloc_size, and functions as a
3963 * validation that this buffer is actually from malloc(3C).
3964 */
3965 /*ARGSUSED*/
3966 static int
um_umem_buffer_cb(uintptr_t addr,void * buf,umem_malloc_info_t * ump)3967 um_umem_buffer_cb(uintptr_t addr, void *buf, umem_malloc_info_t *ump)
3968 {
3969 struct malloc_data md;
3970 size_t m_addr = addr;
3971 size_t overhead = sizeof (md);
3972 size_t mallocsz;
3973
3974 ump->um_total++;
3975
3976 #ifdef _LP64
3977 if (ump->um_cp->cache_bufsize > UMEM_SECOND_ALIGN) {
3978 m_addr += overhead;
3979 overhead += sizeof (md);
3980 }
3981 #endif
3982
3983 if (mdb_vread(&md, sizeof (md), m_addr) == -1) {
3984 mdb_warn("unable to read malloc header at %p", m_addr);
3985 return (WALK_NEXT);
3986 }
3987
3988 switch (UMEM_MALLOC_DECODE(md.malloc_stat, md.malloc_size)) {
3989 case MALLOC_MAGIC:
3990 #ifdef _LP64
3991 case MALLOC_SECOND_MAGIC:
3992 #endif
3993 mallocsz = md.malloc_size - overhead;
3994
3995 ump->um_malloc++;
3996 ump->um_malloc_size += mallocsz;
3997 ump->um_malloc_overhead += overhead;
3998
3999 /* include round-off and debug overhead */
4000 ump->um_malloc_overhead +=
4001 ump->um_cp->cache_chunksize - md.malloc_size;
4002
4003 if (ump->um_bucket != NULL && mallocsz <= UMI_MAX_BUCKET)
4004 ump->um_bucket[mallocsz]++;
4005
4006 break;
4007 default:
4008 break;
4009 }
4010
4011 return (WALK_NEXT);
4012 }
4013
4014 int
get_umem_alloc_sizes(int ** out,size_t * out_num)4015 get_umem_alloc_sizes(int **out, size_t *out_num)
4016 {
4017 GElf_Sym sym;
4018
4019 if (umem_lookup_by_name("umem_alloc_sizes", &sym) == -1) {
4020 mdb_warn("unable to look up umem_alloc_sizes");
4021 return (-1);
4022 }
4023
4024 *out = mdb_alloc(sym.st_size, UM_SLEEP | UM_GC);
4025 *out_num = sym.st_size / sizeof (int);
4026
4027 if (mdb_vread(*out, sym.st_size, sym.st_value) == -1) {
4028 mdb_warn("unable to read umem_alloc_sizes (%p)", sym.st_value);
4029 *out = NULL;
4030 return (-1);
4031 }
4032
4033 return (0);
4034 }
4035
4036
4037 static int
um_umem_cache_cb(uintptr_t addr,umem_cache_t * cp,umem_malloc_info_t * ump)4038 um_umem_cache_cb(uintptr_t addr, umem_cache_t *cp, umem_malloc_info_t *ump)
4039 {
4040 if (strncmp(cp->cache_name, "umem_alloc_", strlen("umem_alloc_")) != 0)
4041 return (WALK_NEXT);
4042
4043 ump->um_cp = cp;
4044
4045 if (mdb_pwalk("umem", (mdb_walk_cb_t)um_umem_buffer_cb, ump, addr) ==
4046 -1) {
4047 mdb_warn("can't walk 'umem' for cache %p", addr);
4048 return (WALK_ERR);
4049 }
4050
4051 return (WALK_NEXT);
4052 }
4053
4054 void
umem_malloc_dist_help(void)4055 umem_malloc_dist_help(void)
4056 {
4057 mdb_printf("%s\n",
4058 "report distribution of outstanding malloc()s");
4059 mdb_dec_indent(2);
4060 mdb_printf("%<b>OPTIONS%</b>\n");
4061 mdb_inc_indent(2);
4062 mdb_printf("%s",
4063 " -b maxbins\n"
4064 " Use at most maxbins bins for the data\n"
4065 " -B minbinsize\n"
4066 " Make the bins at least minbinsize bytes apart\n"
4067 " -d dump the raw data out, without binning\n"
4068 " -g use geometric binning instead of linear binning\n");
4069 }
4070
4071 /*ARGSUSED*/
4072 int
umem_malloc_dist(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)4073 umem_malloc_dist(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4074 {
4075 umem_malloc_info_t mi;
4076 uint_t geometric = 0;
4077 uint_t dump = 0;
4078 size_t maxbuckets = 0;
4079 size_t minbucketsize = 0;
4080
4081 size_t minalloc = 0;
4082 size_t maxalloc = UMI_MAX_BUCKET;
4083
4084 if (flags & DCMD_ADDRSPEC)
4085 return (DCMD_USAGE);
4086
4087 if (mdb_getopts(argc, argv,
4088 'd', MDB_OPT_SETBITS, TRUE, &dump,
4089 'g', MDB_OPT_SETBITS, TRUE, &geometric,
4090 'b', MDB_OPT_UINTPTR, &maxbuckets,
4091 'B', MDB_OPT_UINTPTR, &minbucketsize,
4092 0) != argc)
4093 return (DCMD_USAGE);
4094
4095 bzero(&mi, sizeof (mi));
4096 mi.um_bucket = mdb_zalloc((UMI_MAX_BUCKET + 1) * sizeof (*mi.um_bucket),
4097 UM_SLEEP | UM_GC);
4098
4099 if (mdb_walk("umem_cache", (mdb_walk_cb_t)um_umem_cache_cb,
4100 &mi) == -1) {
4101 mdb_warn("unable to walk 'umem_cache'");
4102 return (DCMD_ERR);
4103 }
4104
4105 if (dump) {
4106 int i;
4107 for (i = minalloc; i <= maxalloc; i++)
4108 mdb_printf("%d\t%d\n", i, mi.um_bucket[i]);
4109
4110 return (DCMD_OK);
4111 }
4112
4113 umem_malloc_print_dist(mi.um_bucket, minalloc, maxalloc,
4114 maxbuckets, minbucketsize, geometric);
4115
4116 return (DCMD_OK);
4117 }
4118
4119 void
umem_malloc_info_help(void)4120 umem_malloc_info_help(void)
4121 {
4122 mdb_printf("%s\n",
4123 "report information about malloc()s by cache. ");
4124 mdb_dec_indent(2);
4125 mdb_printf("%<b>OPTIONS%</b>\n");
4126 mdb_inc_indent(2);
4127 mdb_printf("%s",
4128 " -b maxbins\n"
4129 " Use at most maxbins bins for the data\n"
4130 " -B minbinsize\n"
4131 " Make the bins at least minbinsize bytes apart\n"
4132 " -d dump the raw distribution data without binning\n"
4133 #ifndef _KMDB
4134 " -g use geometric binning instead of linear binning\n"
4135 #endif
4136 "");
4137 }
4138 int
umem_malloc_info(uintptr_t addr,uint_t flags,int argc,const mdb_arg_t * argv)4139 umem_malloc_info(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv)
4140 {
4141 umem_cache_t c;
4142 umem_malloc_info_t mi;
4143
4144 int skip = 0;
4145
4146 size_t maxmalloc;
4147 size_t overhead;
4148 size_t allocated;
4149 size_t avg_malloc;
4150 size_t overhead_pct; /* 1000 * overhead_percent */
4151
4152 uint_t verbose = 0;
4153 uint_t dump = 0;
4154 uint_t geometric = 0;
4155 size_t maxbuckets = 0;
4156 size_t minbucketsize = 0;
4157
4158 int *alloc_sizes;
4159 int idx;
4160 size_t num;
4161 size_t minmalloc;
4162
4163 if (mdb_getopts(argc, argv,
4164 'd', MDB_OPT_SETBITS, TRUE, &dump,
4165 'g', MDB_OPT_SETBITS, TRUE, &geometric,
4166 'b', MDB_OPT_UINTPTR, &maxbuckets,
4167 'B', MDB_OPT_UINTPTR, &minbucketsize,
4168 0) != argc)
4169 return (DCMD_USAGE);
4170
4171 if (dump || geometric || (maxbuckets != 0) || (minbucketsize != 0))
4172 verbose = 1;
4173
4174 if (!(flags & DCMD_ADDRSPEC)) {
4175 if (mdb_walk_dcmd("umem_cache", "umem_malloc_info",
4176 argc, argv) == -1) {
4177 mdb_warn("can't walk umem_cache");
4178 return (DCMD_ERR);
4179 }
4180 return (DCMD_OK);
4181 }
4182
4183 if (!mdb_vread(&c, sizeof (c), addr)) {
4184 mdb_warn("unable to read cache at %p", addr);
4185 return (DCMD_ERR);
4186 }
4187
4188 if (strncmp(c.cache_name, "umem_alloc_", strlen("umem_alloc_")) != 0) {
4189 if (!(flags & DCMD_LOOP))
4190 mdb_warn("umem_malloc_info: cache \"%s\" is not used "
4191 "by malloc()\n", c.cache_name);
4192 skip = 1;
4193 }
4194
4195 /*
4196 * normally, print the header only the first time. In verbose mode,
4197 * print the header on every non-skipped buffer
4198 */
4199 if ((!verbose && DCMD_HDRSPEC(flags)) || (verbose && !skip))
4200 mdb_printf("%<ul>%-?s %6s %6s %8s %8s %10s %10s %6s%</ul>\n",
4201 "CACHE", "BUFSZ", "MAXMAL",
4202 "BUFMALLC", "AVG_MAL", "MALLOCED", "OVERHEAD", "%OVER");
4203
4204 if (skip)
4205 return (DCMD_OK);
4206
4207 maxmalloc = c.cache_bufsize - sizeof (struct malloc_data);
4208 #ifdef _LP64
4209 if (c.cache_bufsize > UMEM_SECOND_ALIGN)
4210 maxmalloc -= sizeof (struct malloc_data);
4211 #endif
4212
4213 bzero(&mi, sizeof (mi));
4214 mi.um_cp = &c;
4215 if (verbose)
4216 mi.um_bucket =
4217 mdb_zalloc((UMI_MAX_BUCKET + 1) * sizeof (*mi.um_bucket),
4218 UM_SLEEP | UM_GC);
4219
4220 if (mdb_pwalk("umem", (mdb_walk_cb_t)um_umem_buffer_cb, &mi, addr) ==
4221 -1) {
4222 mdb_warn("can't walk 'umem'");
4223 return (DCMD_ERR);
4224 }
4225
4226 overhead = mi.um_malloc_overhead;
4227 allocated = mi.um_malloc_size;
4228
4229 /* do integer round off for the average */
4230 if (mi.um_malloc != 0)
4231 avg_malloc = (allocated + (mi.um_malloc - 1)/2) / mi.um_malloc;
4232 else
4233 avg_malloc = 0;
4234
4235 /*
4236 * include per-slab overhead
4237 *
4238 * Each slab in a given cache is the same size, and has the same
4239 * number of chunks in it; we read in the first slab on the
4240 * slab list to get the number of chunks for all slabs. To
4241 * compute the per-slab overhead, we just subtract the chunk usage
4242 * from the slabsize:
4243 *
4244 * +------------+-------+-------+ ... --+-------+-------+-------+
4245 * |////////////| | | ... | |///////|///////|
4246 * |////color///| chunk | chunk | ... | chunk |/color/|/slab//|
4247 * |////////////| | | ... | |///////|///////|
4248 * +------------+-------+-------+ ... --+-------+-------+-------+
4249 * | \_______chunksize * chunks_____/ |
4250 * \__________________________slabsize__________________________/
4251 *
4252 * For UMF_HASH caches, there is an additional source of overhead;
4253 * the external umem_slab_t and per-chunk bufctl structures. We
4254 * include those in our per-slab overhead.
4255 *
4256 * Once we have a number for the per-slab overhead, we estimate
4257 * the actual overhead by treating the malloc()ed buffers as if
4258 * they were densely packed:
4259 *
4260 * additional overhead = (# mallocs) * (per-slab) / (chunks);
4261 *
4262 * carefully ordering the multiply before the divide, to avoid
4263 * round-off error.
4264 */
4265 if (mi.um_malloc != 0) {
4266 umem_slab_t slab;
4267 uintptr_t saddr = (uintptr_t)c.cache_nullslab.slab_next;
4268
4269 if (mdb_vread(&slab, sizeof (slab), saddr) == -1) {
4270 mdb_warn("unable to read slab at %p\n", saddr);
4271 } else {
4272 long chunks = slab.slab_chunks;
4273 if (chunks != 0 && c.cache_chunksize != 0 &&
4274 chunks <= c.cache_slabsize / c.cache_chunksize) {
4275 uintmax_t perslab =
4276 c.cache_slabsize -
4277 (c.cache_chunksize * chunks);
4278
4279 if (c.cache_flags & UMF_HASH) {
4280 perslab += sizeof (umem_slab_t) +
4281 chunks *
4282 ((c.cache_flags & UMF_AUDIT) ?
4283 sizeof (umem_bufctl_audit_t) :
4284 sizeof (umem_bufctl_t));
4285 }
4286 overhead +=
4287 (perslab * (uintmax_t)mi.um_malloc)/chunks;
4288 } else {
4289 mdb_warn("invalid #chunks (%d) in slab %p\n",
4290 chunks, saddr);
4291 }
4292 }
4293 }
4294
4295 if (allocated != 0)
4296 overhead_pct = (1000ULL * overhead) / allocated;
4297 else
4298 overhead_pct = 0;
4299
4300 mdb_printf("%0?p %6ld %6ld %8ld %8ld %10ld %10ld %3ld.%01ld%%\n",
4301 addr, c.cache_bufsize, maxmalloc,
4302 mi.um_malloc, avg_malloc, allocated, overhead,
4303 overhead_pct / 10, overhead_pct % 10);
4304
4305 if (!verbose)
4306 return (DCMD_OK);
4307
4308 if (!dump)
4309 mdb_printf("\n");
4310
4311 if (get_umem_alloc_sizes(&alloc_sizes, &num) == -1)
4312 return (DCMD_ERR);
4313
4314 for (idx = 0; idx < num; idx++) {
4315 if (alloc_sizes[idx] == c.cache_bufsize)
4316 break;
4317 if (alloc_sizes[idx] == 0) {
4318 idx = num; /* 0-terminated array */
4319 break;
4320 }
4321 }
4322 if (idx == num) {
4323 mdb_warn(
4324 "cache %p's size (%d) not in umem_alloc_sizes\n",
4325 addr, c.cache_bufsize);
4326 return (DCMD_ERR);
4327 }
4328
4329 minmalloc = (idx == 0)? 0 : alloc_sizes[idx - 1];
4330 if (minmalloc > 0) {
4331 #ifdef _LP64
4332 if (minmalloc > UMEM_SECOND_ALIGN)
4333 minmalloc -= sizeof (struct malloc_data);
4334 #endif
4335 minmalloc -= sizeof (struct malloc_data);
4336 minmalloc += 1;
4337 }
4338
4339 if (dump) {
4340 for (idx = minmalloc; idx <= maxmalloc; idx++)
4341 mdb_printf("%d\t%d\n", idx, mi.um_bucket[idx]);
4342 mdb_printf("\n");
4343 } else {
4344 umem_malloc_print_dist(mi.um_bucket, minmalloc, maxmalloc,
4345 maxbuckets, minbucketsize, geometric);
4346 }
4347
4348 return (DCMD_OK);
4349 }
4350