xref: /freebsd/sys/kern/subr_stats.c (revision 7790c8c1996ad89a22b8bd194a230cf23ee67f4b)
1 /*-
2  * Copyright (c) 2014-2018 Netflix, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 /*
30  * Author: Lawrence Stewart <lstewart@netflix.com>
31  */
32 
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35 
36 #include <sys/param.h>
37 #include <sys/arb.h>
38 #include <sys/ctype.h>
39 #include <sys/errno.h>
40 #include <sys/hash.h>
41 #include <sys/limits.h>
42 #include <sys/malloc.h>
43 #include <sys/qmath.h>
44 #include <sys/sbuf.h>
45 #if defined(DIAGNOSTIC)
46 #include <sys/tree.h>
47 #endif
48 #include <sys/stats.h> /* Must come after qmath.h and arb.h */
49 #include <sys/stddef.h>
50 #include <sys/stdint.h>
51 #include <sys/time.h>
52 
53 #ifdef _KERNEL
54 #include <sys/kernel.h>
55 #include <sys/lock.h>
56 #include <sys/rwlock.h>
57 #include <sys/sysctl.h>
58 #include <sys/systm.h>
59 #else /* ! _KERNEL */
60 #include <pthread.h>
61 #include <stdbool.h>
62 #include <stdio.h>
63 #include <stdlib.h>
64 #include <string.h>
65 #endif /* _KERNEL */
66 
67 struct voistatdata_voistate {
68 	/* Previous VOI value for diff calculation. */
69 	struct voistatdata_numeric prev;
70 };
71 
72 #define	VS_VSDVALID	0x0001	/* Stat's voistatdata updated at least once. */
73 struct voistat {
74 	int8_t		stype;		/* Type of stat e.g. VS_STYPE_SUM. */
75 	enum vsd_dtype	dtype : 8;	/* Data type of this stat's data. */
76 	uint16_t	data_off;	/* Blob offset for this stat's data. */
77 	uint16_t	dsz;		/* Size of stat's data. */
78 #define	VS_EBITS 8
79 	uint16_t	errs : VS_EBITS;/* Non-wrapping error count. */
80 	uint16_t	flags : 16 - VS_EBITS;
81 };
82 /* The voistat error count is capped to avoid wrapping. */
83 #define	VS_INCERRS(vs) do {						\
84 	if ((vs)->errs < (1U << VS_EBITS) - 1)				\
85 		(vs)->errs++;						\
86 } while (0)
87 
88 /*
89  * Ideas for flags:
90  *   - Global or entity specific (global would imply use of counter(9)?)
91  *   - Whether to reset stats on read or not
92  *   - Signal an overflow?
93  *   - Compressed voistat array
94  */
95 #define	VOI_REQSTATE	0x0001	/* VOI requires VS_STYPE_VOISTATE. */
96 struct voi {
97 	int16_t		id;		/* VOI id. */
98 	enum vsd_dtype	dtype : 8;	/* Data type of the VOI itself. */
99 	int8_t		voistatmaxid;	/* Largest allocated voistat index. */
100 	uint16_t	stats_off;	/* Blob offset for this VOIs stats. */
101 	uint16_t	flags;
102 };
103 
104 /*
105  * Memory for the entire blob is allocated as a slab and then offsets are
106  * maintained to carve up the slab into sections holding different data types.
107  *
108  * Ideas for flags:
109  * - Compressed voi array (trade off memory usage vs search time)
110  * - Units of offsets (default bytes, flag for e.g. vm_page/KiB/Mib)
111  */
112 struct statsblobv1 {
113 	uint8_t		abi;
114 	uint8_t		endian;
115 	uint16_t	flags;
116 	uint16_t	maxsz;
117 	uint16_t	cursz;
118 	/* Fields from here down are opaque to consumers. */
119 	uint32_t	tplhash;	/* Base template hash ID. */
120 	uint16_t	stats_off;	/* voistat array blob offset. */
121 	uint16_t	statsdata_off;	/* voistatdata array blob offset. */
122 	sbintime_t	created;	/* Blob creation time. */
123 	sbintime_t	lastrst;	/* Time of last reset. */
124 	struct voi	vois[];		/* Array indexed by [voi_id]. */
125 } __aligned(sizeof(void *));
126 _Static_assert(offsetof(struct statsblobv1, cursz) +
127     SIZEOF_MEMBER(struct statsblobv1, cursz) == sizeof(struct statsblob),
128     "statsblobv1 ABI mismatch");
129 
130 struct statsblobv1_tpl {
131 	struct metablob		*mb;
132 	struct statsblobv1	*sb;
133 };
134 
135 /* Context passed to iterator callbacks. */
136 struct sb_iter_ctx {
137 	void		*usrctx;	/* Caller supplied context. */
138 	uint32_t	flags;		/* Flags for current iteration. */
139 	int16_t		vslot;		/* struct voi slot index. */
140 	int8_t		vsslot;		/* struct voistat slot index. */
141 };
142 
143 struct sb_tostrcb_ctx {
144 	struct sbuf		*buf;
145 	struct statsblob_tpl	*tpl;
146 	enum sb_str_fmt	fmt;
147 	uint32_t		flags;
148 };
149 
150 struct sb_visitcb_ctx {
151 	stats_blob_visitcb_t	cb;
152 	void			*usrctx;
153 };
154 
155 /* Stats blob iterator callback. */
156 typedef int (*stats_v1_blob_itercb_t)(struct statsblobv1 *sb, struct voi *v,
157     struct voistat *vs, struct sb_iter_ctx *ctx);
158 
159 #ifdef _KERNEL
160 static struct rwlock tpllistlock;
161 RW_SYSINIT(stats_tpl_list, &tpllistlock, "Stat template list lock");
162 #define	TPL_LIST_RLOCK() rw_rlock(&tpllistlock)
163 #define	TPL_LIST_RUNLOCK() rw_runlock(&tpllistlock)
164 #define	TPL_LIST_WLOCK() rw_wlock(&tpllistlock)
165 #define	TPL_LIST_WUNLOCK() rw_wunlock(&tpllistlock)
166 #define	TPL_LIST_LOCK_ASSERT() rw_assert(&tpllistlock, RA_LOCKED)
167 #define	TPL_LIST_RLOCK_ASSERT() rw_assert(&tpllistlock, RA_RLOCKED)
168 #define	TPL_LIST_WLOCK_ASSERT() rw_assert(&tpllistlock, RA_WLOCKED)
169 MALLOC_DEFINE(M_STATS, "stats(9) related memory", "stats(9) related memory");
170 #define	stats_free(ptr) free((ptr), M_STATS)
171 #else /* ! _KERNEL */
172 static void stats_constructor(void);
173 static void stats_destructor(void);
174 static pthread_rwlock_t tpllistlock;
175 #define	TPL_LIST_UNLOCK() pthread_rwlock_unlock(&tpllistlock)
176 #define	TPL_LIST_RLOCK() pthread_rwlock_rdlock(&tpllistlock)
177 #define	TPL_LIST_RUNLOCK() TPL_LIST_UNLOCK()
178 #define	TPL_LIST_WLOCK() pthread_rwlock_wrlock(&tpllistlock)
179 #define	TPL_LIST_WUNLOCK() TPL_LIST_UNLOCK()
180 #define	TPL_LIST_LOCK_ASSERT() do { } while (0)
181 #define	TPL_LIST_RLOCK_ASSERT() do { } while (0)
182 #define	TPL_LIST_WLOCK_ASSERT() do { } while (0)
183 #ifdef NDEBUG
184 #define	KASSERT(cond, msg) do {} while (0)
185 #define	stats_abort() do {} while (0)
186 #else /* ! NDEBUG */
187 #define	KASSERT(cond, msg) do { \
188 	if (!(cond)) { \
189 		panic msg; \
190 	} \
191 } while (0)
192 #define	stats_abort() abort()
193 #endif /* NDEBUG */
194 #define	stats_free(ptr) free(ptr)
195 #define	panic(fmt, ...) do { \
196 	fprintf(stderr, (fmt), ##__VA_ARGS__); \
197 	stats_abort(); \
198 } while (0)
199 #endif /* _KERNEL */
200 
201 #define	SB_V1_MAXSZ 65535
202 
203 /* Obtain a blob offset pointer. */
204 #define	BLOB_OFFSET(sb, off) ((void *)(((uint8_t *)(sb)) + (off)))
205 
206 /*
207  * Number of VOIs in the blob's vois[] array. By virtue of struct voi being a
208  * power of 2 size, we can shift instead of divide. The shift amount must be
209  * updated if sizeof(struct voi) ever changes, which the assert should catch.
210  */
211 #define	NVOIS(sb) ((int32_t)((((struct statsblobv1 *)(sb))->stats_off - \
212     sizeof(struct statsblobv1)) >> 3))
213 _Static_assert(sizeof(struct voi) == 8, "statsblobv1 voi ABI mismatch");
214 
215 /* Try restrict names to alphanumeric and underscore to simplify JSON compat. */
216 const char *vs_stype2name[VS_NUM_STYPES] = {
217 	[VS_STYPE_VOISTATE] = "VOISTATE",
218 	[VS_STYPE_SUM] = "SUM",
219 	[VS_STYPE_MAX] = "MAX",
220 	[VS_STYPE_MIN] = "MIN",
221 	[VS_STYPE_HIST] = "HIST",
222 	[VS_STYPE_TDGST] = "TDGST",
223 };
224 
225 const char *vs_stype2desc[VS_NUM_STYPES] = {
226 	[VS_STYPE_VOISTATE] = "VOI related state data (not a real stat)",
227 	[VS_STYPE_SUM] = "Simple arithmetic accumulator",
228 	[VS_STYPE_MAX] = "Maximum observed VOI value",
229 	[VS_STYPE_MIN] = "Minimum observed VOI value",
230 	[VS_STYPE_HIST] = "Histogram of observed VOI values",
231 	[VS_STYPE_TDGST] = "t-digest of observed VOI values",
232 };
233 
234 const char *vsd_dtype2name[VSD_NUM_DTYPES] = {
235 	[VSD_DTYPE_VOISTATE] = "VOISTATE",
236 	[VSD_DTYPE_INT_S32] = "INT_S32",
237 	[VSD_DTYPE_INT_U32] = "INT_U32",
238 	[VSD_DTYPE_INT_S64] = "INT_S64",
239 	[VSD_DTYPE_INT_U64] = "INT_U64",
240 	[VSD_DTYPE_INT_SLONG] = "INT_SLONG",
241 	[VSD_DTYPE_INT_ULONG] = "INT_ULONG",
242 	[VSD_DTYPE_Q_S32] = "Q_S32",
243 	[VSD_DTYPE_Q_U32] = "Q_U32",
244 	[VSD_DTYPE_Q_S64] = "Q_S64",
245 	[VSD_DTYPE_Q_U64] = "Q_U64",
246 	[VSD_DTYPE_CRHIST32] = "CRHIST32",
247 	[VSD_DTYPE_DRHIST32] = "DRHIST32",
248 	[VSD_DTYPE_DVHIST32] = "DVHIST32",
249 	[VSD_DTYPE_CRHIST64] = "CRHIST64",
250 	[VSD_DTYPE_DRHIST64] = "DRHIST64",
251 	[VSD_DTYPE_DVHIST64] = "DVHIST64",
252 	[VSD_DTYPE_TDGSTCLUST32] = "TDGSTCLUST32",
253 	[VSD_DTYPE_TDGSTCLUST64] = "TDGSTCLUST64",
254 };
255 
256 const size_t vsd_dtype2size[VSD_NUM_DTYPES] = {
257 	[VSD_DTYPE_VOISTATE] = sizeof(struct voistatdata_voistate),
258 	[VSD_DTYPE_INT_S32] = sizeof(struct voistatdata_int32),
259 	[VSD_DTYPE_INT_U32] = sizeof(struct voistatdata_int32),
260 	[VSD_DTYPE_INT_S64] = sizeof(struct voistatdata_int64),
261 	[VSD_DTYPE_INT_U64] = sizeof(struct voistatdata_int64),
262 	[VSD_DTYPE_INT_SLONG] = sizeof(struct voistatdata_intlong),
263 	[VSD_DTYPE_INT_ULONG] = sizeof(struct voistatdata_intlong),
264 	[VSD_DTYPE_Q_S32] = sizeof(struct voistatdata_q32),
265 	[VSD_DTYPE_Q_U32] = sizeof(struct voistatdata_q32),
266 	[VSD_DTYPE_Q_S64] = sizeof(struct voistatdata_q64),
267 	[VSD_DTYPE_Q_U64] = sizeof(struct voistatdata_q64),
268 	[VSD_DTYPE_CRHIST32] = sizeof(struct voistatdata_crhist32),
269 	[VSD_DTYPE_DRHIST32] = sizeof(struct voistatdata_drhist32),
270 	[VSD_DTYPE_DVHIST32] = sizeof(struct voistatdata_dvhist32),
271 	[VSD_DTYPE_CRHIST64] = sizeof(struct voistatdata_crhist64),
272 	[VSD_DTYPE_DRHIST64] = sizeof(struct voistatdata_drhist64),
273 	[VSD_DTYPE_DVHIST64] = sizeof(struct voistatdata_dvhist64),
274 	[VSD_DTYPE_TDGSTCLUST32] = sizeof(struct voistatdata_tdgstclust32),
275 	[VSD_DTYPE_TDGSTCLUST64] = sizeof(struct voistatdata_tdgstclust64),
276 };
277 
278 static const bool vsd_compoundtype[VSD_NUM_DTYPES] = {
279 	[VSD_DTYPE_VOISTATE] = true,
280 	[VSD_DTYPE_INT_S32] = false,
281 	[VSD_DTYPE_INT_U32] = false,
282 	[VSD_DTYPE_INT_S64] = false,
283 	[VSD_DTYPE_INT_U64] = false,
284 	[VSD_DTYPE_INT_SLONG] = false,
285 	[VSD_DTYPE_INT_ULONG] = false,
286 	[VSD_DTYPE_Q_S32] = false,
287 	[VSD_DTYPE_Q_U32] = false,
288 	[VSD_DTYPE_Q_S64] = false,
289 	[VSD_DTYPE_Q_U64] = false,
290 	[VSD_DTYPE_CRHIST32] = true,
291 	[VSD_DTYPE_DRHIST32] = true,
292 	[VSD_DTYPE_DVHIST32] = true,
293 	[VSD_DTYPE_CRHIST64] = true,
294 	[VSD_DTYPE_DRHIST64] = true,
295 	[VSD_DTYPE_DVHIST64] = true,
296 	[VSD_DTYPE_TDGSTCLUST32] = true,
297 	[VSD_DTYPE_TDGSTCLUST64] = true,
298 };
299 
300 const struct voistatdata_numeric numeric_limits[2][VSD_DTYPE_Q_U64 + 1] = {
301 	[LIM_MIN] = {
302 		[VSD_DTYPE_VOISTATE] = {0},
303 		[VSD_DTYPE_INT_S32] = {.int32 = {.s32 = INT32_MIN}},
304 		[VSD_DTYPE_INT_U32] = {.int32 = {.u32 = 0}},
305 		[VSD_DTYPE_INT_S64] = {.int64 = {.s64 = INT64_MIN}},
306 		[VSD_DTYPE_INT_U64] = {.int64 = {.u64 = 0}},
307 		[VSD_DTYPE_INT_SLONG] = {.intlong = {.slong = LONG_MIN}},
308 		[VSD_DTYPE_INT_ULONG] = {.intlong = {.ulong = 0}},
309 		[VSD_DTYPE_Q_S32] = {.q32 = {.sq32 = Q_IFMINVAL(INT32_MIN)}},
310 		[VSD_DTYPE_Q_U32] = {.q32 = {.uq32 = 0}},
311 		[VSD_DTYPE_Q_S64] = {.q64 = {.sq64 = Q_IFMINVAL(INT64_MIN)}},
312 		[VSD_DTYPE_Q_U64] = {.q64 = {.uq64 = 0}},
313 	},
314 	[LIM_MAX] = {
315 		[VSD_DTYPE_VOISTATE] = {0},
316 		[VSD_DTYPE_INT_S32] = {.int32 = {.s32 = INT32_MAX}},
317 		[VSD_DTYPE_INT_U32] = {.int32 = {.u32 = UINT32_MAX}},
318 		[VSD_DTYPE_INT_S64] = {.int64 = {.s64 = INT64_MAX}},
319 		[VSD_DTYPE_INT_U64] = {.int64 = {.u64 = UINT64_MAX}},
320 		[VSD_DTYPE_INT_SLONG] = {.intlong = {.slong = LONG_MAX}},
321 		[VSD_DTYPE_INT_ULONG] = {.intlong = {.ulong = ULONG_MAX}},
322 		[VSD_DTYPE_Q_S32] = {.q32 = {.sq32 = Q_IFMAXVAL(INT32_MAX)}},
323 		[VSD_DTYPE_Q_U32] = {.q32 = {.uq32 = Q_IFMAXVAL(UINT32_MAX)}},
324 		[VSD_DTYPE_Q_S64] = {.q64 = {.sq64 = Q_IFMAXVAL(INT64_MAX)}},
325 		[VSD_DTYPE_Q_U64] = {.q64 = {.uq64 = Q_IFMAXVAL(UINT64_MAX)}},
326 	}
327 };
328 
329 /* tpllistlock protects tpllist and ntpl */
330 static uint32_t ntpl;
331 static struct statsblob_tpl **tpllist;
332 
333 static inline void * stats_realloc(void *ptr, size_t oldsz, size_t newsz,
334     int flags);
335 //static void stats_v1_blob_finalise(struct statsblobv1 *sb);
336 static int stats_v1_blob_init_locked(struct statsblobv1 *sb, uint32_t tpl_id,
337     uint32_t flags);
338 static int stats_v1_blob_expand(struct statsblobv1 **sbpp, int newvoibytes,
339     int newvoistatbytes, int newvoistatdatabytes);
340 static void stats_v1_blob_iter(struct statsblobv1 *sb,
341     stats_v1_blob_itercb_t icb, void *usrctx, uint32_t flags);
342 static inline int stats_v1_vsd_tdgst_add(enum vsd_dtype vs_dtype,
343     struct voistatdata_tdgst *tdgst, s64q_t x, uint64_t weight, int attempt);
344 
345 static inline int
346 ctd32cmp(const struct voistatdata_tdgstctd32 *c1, const struct voistatdata_tdgstctd32 *c2)
347 {
348 
349 	KASSERT(Q_PRECEQ(c1->mu, c2->mu),
350 	    ("%s: Q_RELPREC(c1->mu,c2->mu)=%d", __func__,
351 	    Q_RELPREC(c1->mu, c2->mu)));
352 
353        return (Q_QLTQ(c1->mu, c2->mu) ? -1 : 1);
354 }
355 ARB_GENERATE_STATIC(ctdth32, voistatdata_tdgstctd32, ctdlnk, ctd32cmp);
356 
357 static inline int
358 ctd64cmp(const struct voistatdata_tdgstctd64 *c1, const struct voistatdata_tdgstctd64 *c2)
359 {
360 
361 	KASSERT(Q_PRECEQ(c1->mu, c2->mu),
362 	    ("%s: Q_RELPREC(c1->mu,c2->mu)=%d", __func__,
363 	    Q_RELPREC(c1->mu, c2->mu)));
364 
365        return (Q_QLTQ(c1->mu, c2->mu) ? -1 : 1);
366 }
367 ARB_GENERATE_STATIC(ctdth64, voistatdata_tdgstctd64, ctdlnk, ctd64cmp);
368 
369 #ifdef DIAGNOSTIC
370 RB_GENERATE_STATIC(rbctdth32, voistatdata_tdgstctd32, rblnk, ctd32cmp);
371 RB_GENERATE_STATIC(rbctdth64, voistatdata_tdgstctd64, rblnk, ctd64cmp);
372 #endif
373 
374 static inline sbintime_t
375 stats_sbinuptime(void)
376 {
377 	sbintime_t sbt;
378 #ifdef _KERNEL
379 
380 	sbt = sbinuptime();
381 #else /* ! _KERNEL */
382 	struct timespec tp;
383 
384 	clock_gettime(CLOCK_MONOTONIC_FAST, &tp);
385 	sbt = tstosbt(tp);
386 #endif /* _KERNEL */
387 
388 	return (sbt);
389 }
390 
391 static inline void *
392 stats_realloc(void *ptr, size_t oldsz, size_t newsz, int flags)
393 {
394 
395 #ifdef _KERNEL
396 	/* Default to M_NOWAIT if neither M_NOWAIT or M_WAITOK are set. */
397 	if (!(flags & (M_WAITOK | M_NOWAIT)))
398 		flags |= M_NOWAIT;
399 	ptr = realloc(ptr, newsz, M_STATS, flags);
400 #else /* ! _KERNEL */
401 	ptr = realloc(ptr, newsz);
402 	if ((flags & M_ZERO) && ptr != NULL) {
403 		if (oldsz == 0)
404 			memset(ptr, '\0', newsz);
405 		else if (newsz > oldsz)
406 			memset(BLOB_OFFSET(ptr, oldsz), '\0', newsz - oldsz);
407 	}
408 #endif /* _KERNEL */
409 
410 	return (ptr);
411 }
412 
413 static inline char *
414 stats_strdup(const char *s,
415 #ifdef _KERNEL
416     int flags)
417 {
418 	char *copy;
419 	size_t len;
420 
421 	if (!(flags & (M_WAITOK | M_NOWAIT)))
422 		flags |= M_NOWAIT;
423 
424 	len = strlen(s) + 1;
425 	if ((copy = malloc(len, M_STATS, flags)) != NULL)
426 		bcopy(s, copy, len);
427 
428 	return (copy);
429 #else
430     int flags __unused)
431 {
432 	return (strdup(s));
433 #endif
434 }
435 
436 static inline void
437 stats_tpl_update_hash(struct statsblob_tpl *tpl)
438 {
439 
440 	TPL_LIST_WLOCK_ASSERT();
441 	tpl->mb->tplhash = hash32_str(tpl->mb->tplname, 0);
442 	for (int voi_id = 0; voi_id < NVOIS(tpl->sb); voi_id++) {
443 		if (tpl->mb->voi_meta[voi_id].name != NULL)
444 			tpl->mb->tplhash = hash32_str(
445 			    tpl->mb->voi_meta[voi_id].name, tpl->mb->tplhash);
446 	}
447 	tpl->mb->tplhash = hash32_buf(tpl->sb, tpl->sb->cursz,
448 	    tpl->mb->tplhash);
449 }
450 
451 static inline uint64_t
452 stats_pow_u64(uint64_t base, uint64_t exp)
453 {
454 	uint64_t result = 1;
455 
456 	while (exp) {
457 		if (exp & 1)
458 			result *= base;
459 		exp >>= 1;
460 		base *= base;
461 	}
462 
463 	return (result);
464 }
465 
466 static inline int
467 stats_vss_hist_bkt_hlpr(struct vss_hist_hlpr_info *info, uint32_t curbkt,
468     struct voistatdata_numeric *bkt_lb, struct voistatdata_numeric *bkt_ub)
469 {
470 	uint64_t step = 0;
471 	int error = 0;
472 
473 	switch (info->scheme) {
474 	case BKT_LIN:
475 		step = info->lin.stepinc;
476 		break;
477 	case BKT_EXP:
478 		step = stats_pow_u64(info->exp.stepbase,
479 		    info->exp.stepexp + curbkt);
480 		break;
481 	case BKT_LINEXP:
482 		{
483 		uint64_t curstepexp = 1;
484 
485 		switch (info->voi_dtype) {
486 		case VSD_DTYPE_INT_S32:
487 			while ((int32_t)stats_pow_u64(info->linexp.stepbase,
488 			    curstepexp) <= bkt_lb->int32.s32)
489 				curstepexp++;
490 			break;
491 		case VSD_DTYPE_INT_U32:
492 			while ((uint32_t)stats_pow_u64(info->linexp.stepbase,
493 			    curstepexp) <= bkt_lb->int32.u32)
494 				curstepexp++;
495 			break;
496 		case VSD_DTYPE_INT_S64:
497 			while ((int64_t)stats_pow_u64(info->linexp.stepbase,
498 			    curstepexp) <= bkt_lb->int64.s64)
499 				curstepexp++;
500 			break;
501 		case VSD_DTYPE_INT_U64:
502 			while ((uint64_t)stats_pow_u64(info->linexp.stepbase,
503 			    curstepexp) <= bkt_lb->int64.u64)
504 				curstepexp++;
505 			break;
506 		case VSD_DTYPE_INT_SLONG:
507 			while ((long)stats_pow_u64(info->linexp.stepbase,
508 			    curstepexp) <= bkt_lb->intlong.slong)
509 				curstepexp++;
510 			break;
511 		case VSD_DTYPE_INT_ULONG:
512 			while ((unsigned long)stats_pow_u64(info->linexp.stepbase,
513 			    curstepexp) <= bkt_lb->intlong.ulong)
514 				curstepexp++;
515 			break;
516 		case VSD_DTYPE_Q_S32:
517 			while ((s32q_t)stats_pow_u64(info->linexp.stepbase,
518 			    curstepexp) <= Q_GIVAL(bkt_lb->q32.sq32))
519 			break;
520 		case VSD_DTYPE_Q_U32:
521 			while ((u32q_t)stats_pow_u64(info->linexp.stepbase,
522 			    curstepexp) <= Q_GIVAL(bkt_lb->q32.uq32))
523 			break;
524 		case VSD_DTYPE_Q_S64:
525 			while ((s64q_t)stats_pow_u64(info->linexp.stepbase,
526 			    curstepexp) <= Q_GIVAL(bkt_lb->q64.sq64))
527 				curstepexp++;
528 			break;
529 		case VSD_DTYPE_Q_U64:
530 			while ((u64q_t)stats_pow_u64(info->linexp.stepbase,
531 			    curstepexp) <= Q_GIVAL(bkt_lb->q64.uq64))
532 				curstepexp++;
533 			break;
534 		default:
535 			break;
536 		}
537 
538 		step = stats_pow_u64(info->linexp.stepbase, curstepexp) /
539 		    info->linexp.linstepdiv;
540 		if (step == 0)
541 			step = 1;
542 		break;
543 		}
544 	default:
545 		break;
546 	}
547 
548 	if (info->scheme == BKT_USR) {
549 		*bkt_lb = info->usr.bkts[curbkt].lb;
550 		*bkt_ub = info->usr.bkts[curbkt].ub;
551 	} else if (step != 0) {
552 		switch (info->voi_dtype) {
553 		case VSD_DTYPE_INT_S32:
554 			bkt_ub->int32.s32 += (int32_t)step;
555 			break;
556 		case VSD_DTYPE_INT_U32:
557 			bkt_ub->int32.u32 += (uint32_t)step;
558 			break;
559 		case VSD_DTYPE_INT_S64:
560 			bkt_ub->int64.s64 += (int64_t)step;
561 			break;
562 		case VSD_DTYPE_INT_U64:
563 			bkt_ub->int64.u64 += (uint64_t)step;
564 			break;
565 		case VSD_DTYPE_INT_SLONG:
566 			bkt_ub->intlong.slong += (long)step;
567 			break;
568 		case VSD_DTYPE_INT_ULONG:
569 			bkt_ub->intlong.ulong += (unsigned long)step;
570 			break;
571 		case VSD_DTYPE_Q_S32:
572 			error = Q_QADDI(&bkt_ub->q32.sq32, step);
573 			break;
574 		case VSD_DTYPE_Q_U32:
575 			error = Q_QADDI(&bkt_ub->q32.uq32, step);
576 			break;
577 		case VSD_DTYPE_Q_S64:
578 			error = Q_QADDI(&bkt_ub->q64.sq64, step);
579 			break;
580 		case VSD_DTYPE_Q_U64:
581 			error = Q_QADDI(&bkt_ub->q64.uq64, step);
582 			break;
583 		default:
584 			break;
585 		}
586 	} else { /* info->scheme != BKT_USR && step == 0 */
587 		return (EINVAL);
588 	}
589 
590 	return (error);
591 }
592 
593 static uint32_t
594 stats_vss_hist_nbkts_hlpr(struct vss_hist_hlpr_info *info)
595 {
596 	struct voistatdata_numeric bkt_lb, bkt_ub;
597 	uint32_t nbkts;
598 	int done;
599 
600 	if (info->scheme == BKT_USR) {
601 		/* XXXLAS: Setting info->{lb,ub} from macro is tricky. */
602 		info->lb = info->usr.bkts[0].lb;
603 		info->ub = info->usr.bkts[info->usr.nbkts - 1].lb;
604 	}
605 
606 	nbkts = 0;
607 	done = 0;
608 	bkt_ub = info->lb;
609 
610 	do {
611 		bkt_lb = bkt_ub;
612 		if (stats_vss_hist_bkt_hlpr(info, nbkts++, &bkt_lb, &bkt_ub))
613 			return (0);
614 
615 		if (info->scheme == BKT_USR)
616 			done = (nbkts == info->usr.nbkts);
617 		else {
618 			switch (info->voi_dtype) {
619 			case VSD_DTYPE_INT_S32:
620 				done = (bkt_ub.int32.s32 > info->ub.int32.s32);
621 				break;
622 			case VSD_DTYPE_INT_U32:
623 				done = (bkt_ub.int32.u32 > info->ub.int32.u32);
624 				break;
625 			case VSD_DTYPE_INT_S64:
626 				done = (bkt_ub.int64.s64 > info->ub.int64.s64);
627 				break;
628 			case VSD_DTYPE_INT_U64:
629 				done = (bkt_ub.int64.u64 > info->ub.int64.u64);
630 				break;
631 			case VSD_DTYPE_INT_SLONG:
632 				done = (bkt_ub.intlong.slong >
633 				    info->ub.intlong.slong);
634 				break;
635 			case VSD_DTYPE_INT_ULONG:
636 				done = (bkt_ub.intlong.ulong >
637 				    info->ub.intlong.ulong);
638 				break;
639 			case VSD_DTYPE_Q_S32:
640 				done = Q_QGTQ(bkt_ub.q32.sq32,
641 				    info->ub.q32.sq32);
642 				break;
643 			case VSD_DTYPE_Q_U32:
644 				done = Q_QGTQ(bkt_ub.q32.uq32,
645 				    info->ub.q32.uq32);
646 				break;
647 			case VSD_DTYPE_Q_S64:
648 				done = Q_QGTQ(bkt_ub.q64.sq64,
649 				    info->ub.q64.sq64);
650 				break;
651 			case VSD_DTYPE_Q_U64:
652 				done = Q_QGTQ(bkt_ub.q64.uq64,
653 				    info->ub.q64.uq64);
654 				break;
655 			default:
656 				return (0);
657 			}
658 		}
659 	} while (!done);
660 
661 	if (info->flags & VSD_HIST_LBOUND_INF)
662 		nbkts++;
663 	if (info->flags & VSD_HIST_UBOUND_INF)
664 		nbkts++;
665 
666 	return (nbkts);
667 }
668 
669 int
670 stats_vss_hist_hlpr(enum vsd_dtype voi_dtype, struct voistatspec *vss,
671     struct vss_hist_hlpr_info *info)
672 {
673 	struct voistatdata_hist *hist;
674 	struct voistatdata_numeric bkt_lb, bkt_ub, *lbinfbktlb, *lbinfbktub,
675 	    *ubinfbktlb, *ubinfbktub;
676 	uint32_t bkt, nbkts, nloop;
677 
678 	if (vss == NULL || info == NULL || (info->flags &
679 	(VSD_HIST_LBOUND_INF|VSD_HIST_UBOUND_INF) && (info->hist_dtype ==
680 	VSD_DTYPE_DVHIST32 || info->hist_dtype == VSD_DTYPE_DVHIST64)))
681 		return (EINVAL);
682 
683 	info->voi_dtype = voi_dtype;
684 
685 	if ((nbkts = stats_vss_hist_nbkts_hlpr(info)) == 0)
686 		return (EINVAL);
687 
688 	switch (info->hist_dtype) {
689 	case VSD_DTYPE_CRHIST32:
690 		vss->vsdsz = HIST_NBKTS2VSDSZ(crhist32, nbkts);
691 		break;
692 	case VSD_DTYPE_DRHIST32:
693 		vss->vsdsz = HIST_NBKTS2VSDSZ(drhist32, nbkts);
694 		break;
695 	case VSD_DTYPE_DVHIST32:
696 		vss->vsdsz = HIST_NBKTS2VSDSZ(dvhist32, nbkts);
697 		break;
698 	case VSD_DTYPE_CRHIST64:
699 		vss->vsdsz = HIST_NBKTS2VSDSZ(crhist64, nbkts);
700 		break;
701 	case VSD_DTYPE_DRHIST64:
702 		vss->vsdsz = HIST_NBKTS2VSDSZ(drhist64, nbkts);
703 		break;
704 	case VSD_DTYPE_DVHIST64:
705 		vss->vsdsz = HIST_NBKTS2VSDSZ(dvhist64, nbkts);
706 		break;
707 	default:
708 		return (EINVAL);
709 	}
710 
711 	vss->iv = stats_realloc(NULL, 0, vss->vsdsz, M_ZERO);
712 	if (vss->iv == NULL)
713 		return (ENOMEM);
714 
715 	hist = (struct voistatdata_hist *)vss->iv;
716 	bkt_ub = info->lb;
717 
718 	for (bkt = (info->flags & VSD_HIST_LBOUND_INF), nloop = 0;
719 	    bkt < nbkts;
720 	    bkt++, nloop++) {
721 		bkt_lb = bkt_ub;
722 		if (stats_vss_hist_bkt_hlpr(info, nloop, &bkt_lb, &bkt_ub))
723 			return (EINVAL);
724 
725 		switch (info->hist_dtype) {
726 		case VSD_DTYPE_CRHIST32:
727 			VSD(crhist32, hist)->bkts[bkt].lb = bkt_lb;
728 			break;
729 		case VSD_DTYPE_DRHIST32:
730 			VSD(drhist32, hist)->bkts[bkt].lb = bkt_lb;
731 			VSD(drhist32, hist)->bkts[bkt].ub = bkt_ub;
732 			break;
733 		case VSD_DTYPE_DVHIST32:
734 			VSD(dvhist32, hist)->bkts[bkt].val = bkt_lb;
735 			break;
736 		case VSD_DTYPE_CRHIST64:
737 			VSD(crhist64, hist)->bkts[bkt].lb = bkt_lb;
738 			break;
739 		case VSD_DTYPE_DRHIST64:
740 			VSD(drhist64, hist)->bkts[bkt].lb = bkt_lb;
741 			VSD(drhist64, hist)->bkts[bkt].ub = bkt_ub;
742 			break;
743 		case VSD_DTYPE_DVHIST64:
744 			VSD(dvhist64, hist)->bkts[bkt].val = bkt_lb;
745 			break;
746 		default:
747 			return (EINVAL);
748 		}
749 	}
750 
751 	lbinfbktlb = lbinfbktub = ubinfbktlb = ubinfbktub = NULL;
752 
753 	switch (info->hist_dtype) {
754 	case VSD_DTYPE_CRHIST32:
755 		lbinfbktlb = &VSD(crhist32, hist)->bkts[0].lb;
756 		ubinfbktlb = &VSD(crhist32, hist)->bkts[nbkts - 1].lb;
757 		break;
758 	case VSD_DTYPE_DRHIST32:
759 		lbinfbktlb = &VSD(drhist32, hist)->bkts[0].lb;
760 		lbinfbktub = &VSD(drhist32, hist)->bkts[0].ub;
761 		ubinfbktlb = &VSD(drhist32, hist)->bkts[nbkts - 1].lb;
762 		ubinfbktub = &VSD(drhist32, hist)->bkts[nbkts - 1].ub;
763 		break;
764 	case VSD_DTYPE_CRHIST64:
765 		lbinfbktlb = &VSD(crhist64, hist)->bkts[0].lb;
766 		ubinfbktlb = &VSD(crhist64, hist)->bkts[nbkts - 1].lb;
767 		break;
768 	case VSD_DTYPE_DRHIST64:
769 		lbinfbktlb = &VSD(drhist64, hist)->bkts[0].lb;
770 		lbinfbktub = &VSD(drhist64, hist)->bkts[0].ub;
771 		ubinfbktlb = &VSD(drhist64, hist)->bkts[nbkts - 1].lb;
772 		ubinfbktub = &VSD(drhist64, hist)->bkts[nbkts - 1].ub;
773 		break;
774 	case VSD_DTYPE_DVHIST32:
775 	case VSD_DTYPE_DVHIST64:
776 		break;
777 	default:
778 		return (EINVAL);
779 	}
780 
781 	if ((info->flags & VSD_HIST_LBOUND_INF) && lbinfbktlb) {
782 		*lbinfbktlb = numeric_limits[LIM_MIN][info->voi_dtype];
783 		/*
784 		 * Assignment from numeric_limit array for Q types assigns max
785 		 * possible integral/fractional value for underlying data type,
786 		 * but we must set control bits for this specific histogram per
787 		 * the user's choice of fractional bits, which we extract from
788 		 * info->lb.
789 		 */
790 		if (info->voi_dtype == VSD_DTYPE_Q_S32 ||
791 		    info->voi_dtype == VSD_DTYPE_Q_U32) {
792 			/* Signedness doesn't matter for setting control bits. */
793 			Q_SCVAL(lbinfbktlb->q32.sq32,
794 			    Q_GCVAL(info->lb.q32.sq32));
795 		} else if (info->voi_dtype == VSD_DTYPE_Q_S64 ||
796 		    info->voi_dtype == VSD_DTYPE_Q_U64) {
797 			/* Signedness doesn't matter for setting control bits. */
798 			Q_SCVAL(lbinfbktlb->q64.sq64,
799 			    Q_GCVAL(info->lb.q64.sq64));
800 		}
801 		if (lbinfbktub)
802 			*lbinfbktub = info->lb;
803 	}
804 	if ((info->flags & VSD_HIST_UBOUND_INF) && ubinfbktlb) {
805 		*ubinfbktlb = bkt_lb;
806 		if (ubinfbktub) {
807 			*ubinfbktub = numeric_limits[LIM_MAX][info->voi_dtype];
808 			if (info->voi_dtype == VSD_DTYPE_Q_S32 ||
809 			    info->voi_dtype == VSD_DTYPE_Q_U32) {
810 				Q_SCVAL(ubinfbktub->q32.sq32,
811 				    Q_GCVAL(info->lb.q32.sq32));
812 			} else if (info->voi_dtype == VSD_DTYPE_Q_S64 ||
813 			    info->voi_dtype == VSD_DTYPE_Q_U64) {
814 				Q_SCVAL(ubinfbktub->q64.sq64,
815 				    Q_GCVAL(info->lb.q64.sq64));
816 			}
817 		}
818 	}
819 
820 	return (0);
821 }
822 
823 int
824 stats_vss_tdgst_hlpr(enum vsd_dtype voi_dtype, struct voistatspec *vss,
825     struct vss_tdgst_hlpr_info *info)
826 {
827 	struct voistatdata_tdgst *tdgst;
828 	struct ctdth32 *ctd32tree;
829 	struct ctdth64 *ctd64tree;
830 	struct voistatdata_tdgstctd32 *ctd32;
831 	struct voistatdata_tdgstctd64 *ctd64;
832 
833 	info->voi_dtype = voi_dtype;
834 
835 	switch (info->tdgst_dtype) {
836 	case VSD_DTYPE_TDGSTCLUST32:
837 		vss->vsdsz = TDGST_NCTRS2VSDSZ(tdgstclust32, info->nctds);
838 		break;
839 	case VSD_DTYPE_TDGSTCLUST64:
840 		vss->vsdsz = TDGST_NCTRS2VSDSZ(tdgstclust64, info->nctds);
841 		break;
842 	default:
843 		return (EINVAL);
844 	}
845 
846 	vss->iv = stats_realloc(NULL, 0, vss->vsdsz, M_ZERO);
847 	if (vss->iv == NULL)
848 		return (ENOMEM);
849 
850 	tdgst = (struct voistatdata_tdgst *)vss->iv;
851 
852 	switch (info->tdgst_dtype) {
853 	case VSD_DTYPE_TDGSTCLUST32:
854 		ctd32tree = &VSD(tdgstclust32, tdgst)->ctdtree;
855 		ARB_INIT(ctd32, ctdlnk, ctd32tree, info->nctds) {
856 			Q_INI(&ctd32->mu, 0, 0, info->prec);
857 		}
858 		break;
859 	case VSD_DTYPE_TDGSTCLUST64:
860 		ctd64tree = &VSD(tdgstclust64, tdgst)->ctdtree;
861 		ARB_INIT(ctd64, ctdlnk, ctd64tree, info->nctds) {
862 			Q_INI(&ctd64->mu, 0, 0, info->prec);
863 		}
864 		break;
865 	default:
866 		return (EINVAL);
867 	}
868 
869 	return (0);
870 }
871 
872 int
873 stats_vss_numeric_hlpr(enum vsd_dtype voi_dtype, struct voistatspec *vss,
874     struct vss_numeric_hlpr_info *info)
875 {
876 	struct voistatdata_numeric iv;
877 
878 	switch (vss->stype) {
879 	case VS_STYPE_SUM:
880 		iv = stats_ctor_vsd_numeric(0);
881 		break;
882 	case VS_STYPE_MIN:
883 		iv = numeric_limits[LIM_MAX][voi_dtype];
884 		break;
885 	case VS_STYPE_MAX:
886 		iv = numeric_limits[LIM_MIN][voi_dtype];
887 		break;
888 	default:
889 		return (EINVAL);
890 	}
891 
892 	vss->iv = stats_realloc(NULL, 0, vsd_dtype2size[voi_dtype], 0);
893 	if (vss->iv == NULL)
894 		return (ENOMEM);
895 
896 	vss->vs_dtype = voi_dtype;
897 	vss->vsdsz = vsd_dtype2size[voi_dtype];
898 	switch (voi_dtype) {
899 	case VSD_DTYPE_INT_S32:
900 		*((int32_t *)vss->iv) = iv.int32.s32;
901 		break;
902 	case VSD_DTYPE_INT_U32:
903 		*((uint32_t *)vss->iv) = iv.int32.u32;
904 		break;
905 	case VSD_DTYPE_INT_S64:
906 		*((int64_t *)vss->iv) = iv.int64.s64;
907 		break;
908 	case VSD_DTYPE_INT_U64:
909 		*((uint64_t *)vss->iv) = iv.int64.u64;
910 		break;
911 	case VSD_DTYPE_INT_SLONG:
912 		*((long *)vss->iv) = iv.intlong.slong;
913 		break;
914 	case VSD_DTYPE_INT_ULONG:
915 		*((unsigned long *)vss->iv) = iv.intlong.ulong;
916 		break;
917 	case VSD_DTYPE_Q_S32:
918 		*((s32q_t *)vss->iv) = Q_SCVAL(iv.q32.sq32,
919 		    Q_CTRLINI(info->prec));
920 		break;
921 	case VSD_DTYPE_Q_U32:
922 		*((u32q_t *)vss->iv) = Q_SCVAL(iv.q32.uq32,
923 		    Q_CTRLINI(info->prec));
924 		break;
925 	case VSD_DTYPE_Q_S64:
926 		*((s64q_t *)vss->iv) = Q_SCVAL(iv.q64.sq64,
927 		    Q_CTRLINI(info->prec));
928 		break;
929 	case VSD_DTYPE_Q_U64:
930 		*((u64q_t *)vss->iv) = Q_SCVAL(iv.q64.uq64,
931 		    Q_CTRLINI(info->prec));
932 		break;
933 	default:
934 		break;
935 	}
936 
937 	return (0);
938 }
939 
940 int
941 stats_vss_hlpr_init(enum vsd_dtype voi_dtype, uint32_t nvss,
942     struct voistatspec *vss)
943 {
944 	int i, ret;
945 
946 	for (i = nvss - 1; i >= 0; i--) {
947 		if (vss[i].hlpr && (ret = vss[i].hlpr(voi_dtype, &vss[i],
948 		    vss[i].hlprinfo)) != 0)
949 			return (ret);
950 	}
951 
952 	return (0);
953 }
954 
955 void
956 stats_vss_hlpr_cleanup(uint32_t nvss, struct voistatspec *vss)
957 {
958 	int i;
959 
960 	for (i = nvss - 1; i >= 0; i--) {
961 		if (vss[i].hlpr) {
962 			stats_free((void *)vss[i].iv);
963 			vss[i].iv = NULL;
964 		}
965 	}
966 }
967 
968 int
969 stats_tpl_fetch(int tpl_id, struct statsblob_tpl **tpl)
970 {
971 	int error;
972 
973 	error = 0;
974 
975 	TPL_LIST_WLOCK();
976 	if (tpl_id < 0 || tpl_id >= (int)ntpl) {
977 		error = ENOENT;
978 	} else {
979 		*tpl = tpllist[tpl_id];
980 		/* XXXLAS: Acquire refcount on tpl. */
981 	}
982 	TPL_LIST_WUNLOCK();
983 
984 	return (error);
985 }
986 
987 int
988 stats_tpl_fetch_allocid(const char *name, uint32_t hash)
989 {
990 	int i, tpl_id;
991 
992 	tpl_id = -ESRCH;
993 
994 	TPL_LIST_RLOCK();
995 	for (i = ntpl - 1; i >= 0; i--) {
996 		if (name != NULL) {
997 			if (strlen(name) == strlen(tpllist[i]->mb->tplname) &&
998 			    strncmp(name, tpllist[i]->mb->tplname,
999 			    TPL_MAX_NAME_LEN) == 0 && (!hash || hash ==
1000 			    tpllist[i]->mb->tplhash)) {
1001 				tpl_id = i;
1002 				break;
1003 			}
1004 		} else if (hash == tpllist[i]->mb->tplhash) {
1005 			tpl_id = i;
1006 			break;
1007 		}
1008 	}
1009 	TPL_LIST_RUNLOCK();
1010 
1011 	return (tpl_id);
1012 }
1013 
1014 int
1015 stats_tpl_id2name(uint32_t tpl_id, char *buf, size_t len)
1016 {
1017 	int error;
1018 
1019 	error = 0;
1020 
1021 	TPL_LIST_RLOCK();
1022 	if (tpl_id < ntpl) {
1023 		if (buf != NULL && len > strlen(tpllist[tpl_id]->mb->tplname))
1024 			strlcpy(buf, tpllist[tpl_id]->mb->tplname, len);
1025 		else
1026 			error = EOVERFLOW;
1027 	} else
1028 		error = ENOENT;
1029 	TPL_LIST_RUNLOCK();
1030 
1031 	return (error);
1032 }
1033 
1034 int
1035 stats_tpl_sample_rollthedice(struct stats_tpl_sample_rate *rates, int nrates,
1036     void *seed_bytes, size_t seed_len)
1037 {
1038 	uint32_t cum_pct, rnd_pct;
1039 	int i;
1040 
1041 	cum_pct = 0;
1042 
1043 	/*
1044 	 * Choose a pseudorandom or seeded number in range [0,100] and use
1045 	 * it to make a sampling decision and template selection where required.
1046 	 * If no seed is supplied, a PRNG is used to generate a pseudorandom
1047 	 * number so that every selection is independent. If a seed is supplied,
1048 	 * the caller desires random selection across different seeds, but
1049 	 * deterministic selection given the same seed. This is achieved by
1050 	 * hashing the seed and using the hash as the random number source.
1051 	 *
1052 	 * XXXLAS: Characterise hash function output distribution.
1053 	 */
1054 	if (seed_bytes == NULL)
1055 		rnd_pct = random() / (INT32_MAX / 100);
1056 	else
1057 		rnd_pct = hash32_buf(seed_bytes, seed_len, 0) /
1058 		    (UINT32_MAX / 100U);
1059 
1060 	/*
1061 	 * We map the randomly selected percentage on to the interval [0,100]
1062 	 * consisting of the cumulatively summed template sampling percentages.
1063 	 * The difference between the cumulative sum of all template sampling
1064 	 * percentages and 100 is treated as a NULL assignment i.e. no stats
1065 	 * template will be assigned, and -1 returned instead.
1066 	 */
1067 	for (i = 0; i < nrates; i++) {
1068 		cum_pct += rates[i].tpl_sample_pct;
1069 
1070 		KASSERT(cum_pct <= 100, ("%s cum_pct %u > 100", __func__,
1071 		    cum_pct));
1072 		if (rnd_pct > cum_pct || rates[i].tpl_sample_pct == 0)
1073 			continue;
1074 
1075 		return (rates[i].tpl_slot_id);
1076 	}
1077 
1078 	return (-1);
1079 }
1080 
1081 int
1082 stats_v1_blob_clone(struct statsblobv1 **dst, size_t dstmaxsz,
1083     struct statsblobv1 *src, uint32_t flags)
1084 {
1085 	int error;
1086 
1087 	error = 0;
1088 
1089 	if (src == NULL || dst == NULL ||
1090 	    src->cursz < sizeof(struct statsblob) ||
1091 	    ((flags & SB_CLONE_ALLOCDST) &&
1092 	    (flags & (SB_CLONE_USRDSTNOFAULT | SB_CLONE_USRDST)))) {
1093 		error = EINVAL;
1094 	} else if (flags & SB_CLONE_ALLOCDST) {
1095 		*dst = stats_realloc(NULL, 0, src->cursz, 0);
1096 		if (*dst)
1097 			(*dst)->maxsz = dstmaxsz = src->cursz;
1098 		else
1099 			error = ENOMEM;
1100 	} else if (*dst == NULL || dstmaxsz < sizeof(struct statsblob)) {
1101 		error = EINVAL;
1102 	}
1103 
1104 	if (!error) {
1105 		size_t postcurszlen;
1106 
1107 		/*
1108 		 * Clone src into dst except for the maxsz field. If dst is too
1109 		 * small to hold all of src, only copy src's header and return
1110 		 * EOVERFLOW.
1111 		 */
1112 #ifdef _KERNEL
1113 		if (flags & SB_CLONE_USRDSTNOFAULT)
1114 			copyout_nofault(src, *dst,
1115 			    offsetof(struct statsblob, maxsz));
1116 		else if (flags & SB_CLONE_USRDST)
1117 			copyout(src, *dst, offsetof(struct statsblob, maxsz));
1118 		else
1119 #endif
1120 			memcpy(*dst, src, offsetof(struct statsblob, maxsz));
1121 
1122 		if (dstmaxsz >= src->cursz) {
1123 			postcurszlen = src->cursz -
1124 			    offsetof(struct statsblob, cursz);
1125 		} else {
1126 			error = EOVERFLOW;
1127 			postcurszlen = sizeof(struct statsblob) -
1128 			    offsetof(struct statsblob, cursz);
1129 		}
1130 #ifdef _KERNEL
1131 		if (flags & SB_CLONE_USRDSTNOFAULT)
1132 			copyout_nofault(&(src->cursz), &((*dst)->cursz),
1133 			    postcurszlen);
1134 		else if (flags & SB_CLONE_USRDST)
1135 			copyout(&(src->cursz), &((*dst)->cursz), postcurszlen);
1136 		else
1137 #endif
1138 			memcpy(&((*dst)->cursz), &(src->cursz), postcurszlen);
1139 	}
1140 
1141 	return (error);
1142 }
1143 
1144 int
1145 stats_v1_tpl_alloc(const char *name, uint32_t flags __unused)
1146 {
1147 	struct statsblobv1_tpl *tpl, **newtpllist;
1148 	struct statsblobv1 *tpl_sb;
1149 	struct metablob *tpl_mb;
1150 	int tpl_id;
1151 
1152 	if (name != NULL && strlen(name) > TPL_MAX_NAME_LEN)
1153 		return (-EINVAL);
1154 
1155 	if (name != NULL && stats_tpl_fetch_allocid(name, 0) >= 0)
1156 		return (-EEXIST);
1157 
1158 	tpl = stats_realloc(NULL, 0, sizeof(struct statsblobv1_tpl), M_ZERO);
1159 	tpl_mb = stats_realloc(NULL, 0, sizeof(struct metablob), M_ZERO);
1160 	tpl_sb = stats_realloc(NULL, 0, sizeof(struct statsblobv1), M_ZERO);
1161 
1162 	if (tpl_mb != NULL && name != NULL)
1163 		tpl_mb->tplname = stats_strdup(name, 0);
1164 
1165 	if (tpl == NULL || tpl_sb == NULL || tpl_mb == NULL ||
1166 	    tpl_mb->tplname == NULL) {
1167 		stats_free(tpl);
1168 		stats_free(tpl_sb);
1169 		if (tpl_mb != NULL) {
1170 			stats_free(tpl_mb->tplname);
1171 			stats_free(tpl_mb);
1172 		}
1173 		return (-ENOMEM);
1174 	}
1175 
1176 	tpl->mb = tpl_mb;
1177 	tpl->sb = tpl_sb;
1178 
1179 	tpl_sb->abi = STATS_ABI_V1;
1180 	tpl_sb->endian =
1181 #if BYTE_ORDER == LITTLE_ENDIAN
1182 	    SB_LE;
1183 #elif BYTE_ORDER == BIG_ENDIAN
1184 	    SB_BE;
1185 #else
1186 	    SB_UE;
1187 #endif
1188 	tpl_sb->cursz = tpl_sb->maxsz = sizeof(struct statsblobv1);
1189 	tpl_sb->stats_off = tpl_sb->statsdata_off = sizeof(struct statsblobv1);
1190 
1191 	TPL_LIST_WLOCK();
1192 	newtpllist = stats_realloc(tpllist, ntpl * sizeof(void *),
1193 	    (ntpl + 1) * sizeof(void *), 0);
1194 	if (newtpllist != NULL) {
1195 		tpl_id = ntpl++;
1196 		tpllist = (struct statsblob_tpl **)newtpllist;
1197 		tpllist[tpl_id] = (struct statsblob_tpl *)tpl;
1198 		stats_tpl_update_hash(tpllist[tpl_id]);
1199 	} else {
1200 		stats_free(tpl);
1201 		stats_free(tpl_sb);
1202 		if (tpl_mb != NULL) {
1203 			stats_free(tpl_mb->tplname);
1204 			stats_free(tpl_mb);
1205 		}
1206 		tpl_id = -ENOMEM;
1207 	}
1208 	TPL_LIST_WUNLOCK();
1209 
1210 	return (tpl_id);
1211 }
1212 
1213 int
1214 stats_v1_tpl_add_voistats(uint32_t tpl_id, int32_t voi_id, const char *voi_name,
1215     enum vsd_dtype voi_dtype, uint32_t nvss, struct voistatspec *vss,
1216     uint32_t flags)
1217 {
1218 	struct voi *voi;
1219 	struct voistat *tmpstat;
1220 	struct statsblobv1 *tpl_sb;
1221 	struct metablob *tpl_mb;
1222 	int error, i, newstatdataidx, newvoibytes, newvoistatbytes,
1223 	    newvoistatdatabytes, newvoistatmaxid;
1224 	uint32_t nbytes;
1225 
1226 	if (voi_id < 0 || voi_dtype == 0 || voi_dtype >= VSD_NUM_DTYPES ||
1227 	    nvss == 0 || vss == NULL)
1228 		return (EINVAL);
1229 
1230 	error = nbytes = newvoibytes = newvoistatbytes =
1231 	    newvoistatdatabytes = 0;
1232 	newvoistatmaxid = -1;
1233 
1234 	/* Calculate the number of bytes required for the new voistats. */
1235 	for (i = nvss - 1; i >= 0; i--) {
1236 		if (vss[i].stype == 0 || vss[i].stype >= VS_NUM_STYPES ||
1237 		    vss[i].vs_dtype == 0 || vss[i].vs_dtype >= VSD_NUM_DTYPES ||
1238 		    vss[i].iv == NULL || vss[i].vsdsz == 0)
1239 			return (EINVAL);
1240 		if ((int)vss[i].stype > newvoistatmaxid)
1241 			newvoistatmaxid = vss[i].stype;
1242 		newvoistatdatabytes += vss[i].vsdsz;
1243 	}
1244 
1245 	if (flags & SB_VOI_RELUPDATE) {
1246 		/* XXXLAS: VOI state bytes may need to vary based on stat types. */
1247 		newvoistatdatabytes += sizeof(struct voistatdata_voistate);
1248 	}
1249 	nbytes += newvoistatdatabytes;
1250 
1251 	TPL_LIST_WLOCK();
1252 	if (tpl_id < ntpl) {
1253 		tpl_sb = (struct statsblobv1 *)tpllist[tpl_id]->sb;
1254 		tpl_mb = tpllist[tpl_id]->mb;
1255 
1256 		if (voi_id >= NVOIS(tpl_sb) || tpl_sb->vois[voi_id].id == -1) {
1257 			/* Adding a new VOI and associated stats. */
1258 			if (voi_id >= NVOIS(tpl_sb)) {
1259 				/* We need to grow the tpl_sb->vois array. */
1260 				newvoibytes = (voi_id - (NVOIS(tpl_sb) - 1)) *
1261 				    sizeof(struct voi);
1262 				nbytes += newvoibytes;
1263 			}
1264 			newvoistatbytes =
1265 			    (newvoistatmaxid + 1) * sizeof(struct voistat);
1266 		} else {
1267 			/* Adding stats to an existing VOI. */
1268 			if (newvoistatmaxid >
1269 			    tpl_sb->vois[voi_id].voistatmaxid) {
1270 				newvoistatbytes = (newvoistatmaxid -
1271 				    tpl_sb->vois[voi_id].voistatmaxid) *
1272 				    sizeof(struct voistat);
1273 			}
1274 			/* XXXLAS: KPI does not yet support expanding VOIs. */
1275 			error = EOPNOTSUPP;
1276 		}
1277 		nbytes += newvoistatbytes;
1278 
1279 		if (!error && newvoibytes > 0) {
1280 			struct voi_meta *voi_meta = tpl_mb->voi_meta;
1281 
1282 			voi_meta = stats_realloc(voi_meta, voi_meta == NULL ?
1283 			    0 : NVOIS(tpl_sb) * sizeof(struct voi_meta),
1284 			    (1 + voi_id) * sizeof(struct voi_meta),
1285 			    M_ZERO);
1286 
1287 			if (voi_meta == NULL)
1288 				error = ENOMEM;
1289 			else
1290 				tpl_mb->voi_meta = voi_meta;
1291 		}
1292 
1293 		if (!error) {
1294 			/* NB: Resizing can change where tpl_sb points. */
1295 			error = stats_v1_blob_expand(&tpl_sb, newvoibytes,
1296 			    newvoistatbytes, newvoistatdatabytes);
1297 		}
1298 
1299 		if (!error) {
1300 			tpl_mb->voi_meta[voi_id].name = stats_strdup(voi_name,
1301 			    0);
1302 			if (tpl_mb->voi_meta[voi_id].name == NULL)
1303 				error = ENOMEM;
1304 		}
1305 
1306 		if (!error) {
1307 			/* Update the template list with the resized pointer. */
1308 			tpllist[tpl_id]->sb = (struct statsblob *)tpl_sb;
1309 
1310 			/* Update the template. */
1311 			voi = &tpl_sb->vois[voi_id];
1312 
1313 			if (voi->id < 0) {
1314 				/* VOI is new and needs to be initialised. */
1315 				voi->id = voi_id;
1316 				voi->dtype = voi_dtype;
1317 				voi->stats_off = tpl_sb->stats_off;
1318 				if (flags & SB_VOI_RELUPDATE)
1319 					voi->flags |= VOI_REQSTATE;
1320 			} else {
1321 				/*
1322 				 * XXXLAS: When this else block is written, the
1323 				 * "KPI does not yet support expanding VOIs"
1324 				 * error earlier in this function can be
1325 				 * removed. What is required here is to shuffle
1326 				 * the voistat array such that the new stats for
1327 				 * the voi are contiguous, which will displace
1328 				 * stats for other vois that reside after the
1329 				 * voi being updated. The other vois then need
1330 				 * to have their stats_off adjusted post
1331 				 * shuffle.
1332 				 */
1333 			}
1334 
1335 			voi->voistatmaxid = newvoistatmaxid;
1336 			newstatdataidx = 0;
1337 
1338 			if (voi->flags & VOI_REQSTATE) {
1339 				/* Initialise the voistate stat in slot 0. */
1340 				tmpstat = BLOB_OFFSET(tpl_sb, voi->stats_off);
1341 				tmpstat->stype = VS_STYPE_VOISTATE;
1342 				tmpstat->flags = 0;
1343 				tmpstat->dtype = VSD_DTYPE_VOISTATE;
1344 				newstatdataidx = tmpstat->dsz =
1345 				    sizeof(struct voistatdata_numeric);
1346 				tmpstat->data_off = tpl_sb->statsdata_off;
1347 			}
1348 
1349 			for (i = 0; (uint32_t)i < nvss; i++) {
1350 				tmpstat = BLOB_OFFSET(tpl_sb, voi->stats_off +
1351 				    (vss[i].stype * sizeof(struct voistat)));
1352 				KASSERT(tmpstat->stype < 0, ("voistat %p "
1353 				    "already initialised", tmpstat));
1354 				tmpstat->stype = vss[i].stype;
1355 				tmpstat->flags = vss[i].flags;
1356 				tmpstat->dtype = vss[i].vs_dtype;
1357 				tmpstat->dsz = vss[i].vsdsz;
1358 				tmpstat->data_off = tpl_sb->statsdata_off +
1359 				    newstatdataidx;
1360 				memcpy(BLOB_OFFSET(tpl_sb, tmpstat->data_off),
1361 				    vss[i].iv, vss[i].vsdsz);
1362 				newstatdataidx += vss[i].vsdsz;
1363 			}
1364 
1365 			/* Update the template version hash. */
1366 			stats_tpl_update_hash(tpllist[tpl_id]);
1367 			/* XXXLAS: Confirm tpl name/hash pair remains unique. */
1368 		}
1369 	} else
1370 		error = EINVAL;
1371 	TPL_LIST_WUNLOCK();
1372 
1373 	return (error);
1374 }
1375 
1376 struct statsblobv1 *
1377 stats_v1_blob_alloc(uint32_t tpl_id, uint32_t flags __unused)
1378 {
1379 	struct statsblobv1 *sb;
1380 	int error;
1381 
1382 	sb = NULL;
1383 
1384 	TPL_LIST_RLOCK();
1385 	if (tpl_id < ntpl) {
1386 		sb = stats_realloc(NULL, 0, tpllist[tpl_id]->sb->maxsz, 0);
1387 		if (sb != NULL) {
1388 			sb->maxsz = tpllist[tpl_id]->sb->maxsz;
1389 			error = stats_v1_blob_init_locked(sb, tpl_id, 0);
1390 		} else
1391 			error = ENOMEM;
1392 
1393 		if (error) {
1394 			stats_free(sb);
1395 			sb = NULL;
1396 		}
1397 	}
1398 	TPL_LIST_RUNLOCK();
1399 
1400 	return (sb);
1401 }
1402 
1403 void
1404 stats_v1_blob_destroy(struct statsblobv1 *sb)
1405 {
1406 
1407 	stats_free(sb);
1408 }
1409 
1410 int
1411 stats_v1_voistat_fetch_dptr(struct statsblobv1 *sb, int32_t voi_id,
1412     enum voi_stype stype, enum vsd_dtype *retdtype, struct voistatdata **retvsd,
1413     size_t *retvsdsz)
1414 {
1415 	struct voi *v;
1416 	struct voistat *vs;
1417 
1418 	if (retvsd == NULL || sb == NULL || sb->abi != STATS_ABI_V1 ||
1419 	    voi_id >= NVOIS(sb))
1420 		return (EINVAL);
1421 
1422 	v = &sb->vois[voi_id];
1423 	if ((__typeof(v->voistatmaxid))stype > v->voistatmaxid)
1424 		return (EINVAL);
1425 
1426 	vs = BLOB_OFFSET(sb, v->stats_off + (stype * sizeof(struct voistat)));
1427 	*retvsd = BLOB_OFFSET(sb, vs->data_off);
1428 	if (retdtype != NULL)
1429 		*retdtype = vs->dtype;
1430 	if (retvsdsz != NULL)
1431 		*retvsdsz = vs->dsz;
1432 
1433 	return (0);
1434 }
1435 
1436 int
1437 stats_v1_blob_init(struct statsblobv1 *sb, uint32_t tpl_id, uint32_t flags)
1438 {
1439 	int error;
1440 
1441 	error = 0;
1442 
1443 	TPL_LIST_RLOCK();
1444 	if (sb == NULL || tpl_id >= ntpl) {
1445 		error = EINVAL;
1446 	} else {
1447 		error = stats_v1_blob_init_locked(sb, tpl_id, flags);
1448 	}
1449 	TPL_LIST_RUNLOCK();
1450 
1451 	return (error);
1452 }
1453 
1454 static inline int
1455 stats_v1_blob_init_locked(struct statsblobv1 *sb, uint32_t tpl_id,
1456     uint32_t flags __unused)
1457 {
1458 	int error;
1459 
1460 	TPL_LIST_RLOCK_ASSERT();
1461 	error = (sb->maxsz >= tpllist[tpl_id]->sb->cursz) ? 0 : EOVERFLOW;
1462 	KASSERT(!error,
1463 	    ("sb %d instead of %d bytes", sb->maxsz, tpllist[tpl_id]->sb->cursz));
1464 
1465 	if (!error) {
1466 		memcpy(sb, tpllist[tpl_id]->sb, tpllist[tpl_id]->sb->cursz);
1467 		sb->created = sb->lastrst = stats_sbinuptime();
1468 		sb->tplhash = tpllist[tpl_id]->mb->tplhash;
1469 	}
1470 
1471 	return (error);
1472 }
1473 
1474 static int
1475 stats_v1_blob_expand(struct statsblobv1 **sbpp, int newvoibytes,
1476     int newvoistatbytes, int newvoistatdatabytes)
1477 {
1478 	struct statsblobv1 *sb;
1479 	struct voi *tmpvoi;
1480 	struct voistat *tmpvoistat, *voistat_array;
1481 	int error, i, idxnewvois, idxnewvoistats, nbytes, nvoistats;
1482 
1483 	KASSERT(newvoibytes % sizeof(struct voi) == 0,
1484 	    ("Bad newvoibytes %d", newvoibytes));
1485 	KASSERT(newvoistatbytes % sizeof(struct voistat) == 0,
1486 	    ("Bad newvoistatbytes %d", newvoistatbytes));
1487 
1488 	error = ((newvoibytes % sizeof(struct voi) == 0) &&
1489 	    (newvoistatbytes % sizeof(struct voistat) == 0)) ? 0 : EINVAL;
1490 	sb = *sbpp;
1491 	nbytes = newvoibytes + newvoistatbytes + newvoistatdatabytes;
1492 
1493 	/*
1494 	 * XXXLAS: Required until we gain support for flags which alter the
1495 	 * units of size/offset fields in key structs.
1496 	 */
1497 	if (!error && ((((int)sb->cursz) + nbytes) > SB_V1_MAXSZ))
1498 		error = EFBIG;
1499 
1500 	if (!error && (sb->cursz + nbytes > sb->maxsz)) {
1501 		/* Need to expand our blob. */
1502 		sb = stats_realloc(sb, sb->maxsz, sb->cursz + nbytes, M_ZERO);
1503 		if (sb != NULL) {
1504 			sb->maxsz = sb->cursz + nbytes;
1505 			*sbpp = sb;
1506 		} else
1507 		    error = ENOMEM;
1508 	}
1509 
1510 	if (!error) {
1511 		/*
1512 		 * Shuffle memory within the expanded blob working from the end
1513 		 * backwards, leaving gaps for the new voistat and voistatdata
1514 		 * structs at the beginning of their respective blob regions,
1515 		 * and for the new voi structs at the end of their blob region.
1516 		 */
1517 		memmove(BLOB_OFFSET(sb, sb->statsdata_off + nbytes),
1518 		    BLOB_OFFSET(sb, sb->statsdata_off),
1519 		    sb->cursz - sb->statsdata_off);
1520 		memmove(BLOB_OFFSET(sb, sb->stats_off + newvoibytes +
1521 		    newvoistatbytes), BLOB_OFFSET(sb, sb->stats_off),
1522 		    sb->statsdata_off - sb->stats_off);
1523 
1524 		/* First index of new voi/voistat structs to be initialised. */
1525 		idxnewvois = NVOIS(sb);
1526 		idxnewvoistats = (newvoistatbytes / sizeof(struct voistat)) - 1;
1527 
1528 		/* Update housekeeping variables and offsets. */
1529 		sb->cursz += nbytes;
1530 		sb->stats_off += newvoibytes;
1531 		sb->statsdata_off += newvoibytes + newvoistatbytes;
1532 
1533 		/* XXXLAS: Zeroing not strictly needed but aids debugging. */
1534 		memset(&sb->vois[idxnewvois], '\0', newvoibytes);
1535 		memset(BLOB_OFFSET(sb, sb->stats_off), '\0',
1536 		    newvoistatbytes);
1537 		memset(BLOB_OFFSET(sb, sb->statsdata_off), '\0',
1538 		    newvoistatdatabytes);
1539 
1540 		/* Initialise new voi array members and update offsets. */
1541 		for (i = 0; i < NVOIS(sb); i++) {
1542 			tmpvoi = &sb->vois[i];
1543 			if (i >= idxnewvois) {
1544 				tmpvoi->id = tmpvoi->voistatmaxid = -1;
1545 			} else if (tmpvoi->id > -1) {
1546 				tmpvoi->stats_off += newvoibytes +
1547 				    newvoistatbytes;
1548 			}
1549 		}
1550 
1551 		/* Initialise new voistat array members and update offsets. */
1552 		nvoistats = (sb->statsdata_off - sb->stats_off) /
1553 		    sizeof(struct voistat);
1554 		voistat_array = BLOB_OFFSET(sb, sb->stats_off);
1555 		for (i = 0; i < nvoistats; i++) {
1556 			tmpvoistat = &voistat_array[i];
1557 			if (i <= idxnewvoistats) {
1558 				tmpvoistat->stype = -1;
1559 			} else if (tmpvoistat->stype > -1) {
1560 				tmpvoistat->data_off += nbytes;
1561 			}
1562 		}
1563 	}
1564 
1565 	return (error);
1566 }
1567 
1568 static void
1569 stats_v1_blob_finalise(struct statsblobv1 *sb __unused)
1570 {
1571 
1572 	/* XXXLAS: Fill this in. */
1573 }
1574 
1575 static void
1576 stats_v1_blob_iter(struct statsblobv1 *sb, stats_v1_blob_itercb_t icb,
1577     void *usrctx, uint32_t flags)
1578 {
1579 	struct voi *v;
1580 	struct voistat *vs;
1581 	struct sb_iter_ctx ctx;
1582 	int i, j, firstvoi;
1583 
1584 	ctx.usrctx = usrctx;
1585 	ctx.flags |= SB_IT_FIRST_CB;
1586 	ctx.flags &= ~(SB_IT_FIRST_VOI | SB_IT_LAST_VOI | SB_IT_FIRST_VOISTAT |
1587 	    SB_IT_LAST_VOISTAT);
1588 	firstvoi = 1;
1589 
1590 	for (i = 0; i < NVOIS(sb); i++) {
1591 		v = &sb->vois[i];
1592 		ctx.vslot = i;
1593 		ctx.vsslot = -1;
1594 		ctx.flags |= SB_IT_FIRST_VOISTAT;
1595 
1596 		if (firstvoi)
1597 			ctx.flags |= SB_IT_FIRST_VOI;
1598 		else if (i == (NVOIS(sb) - 1))
1599 			ctx.flags |= SB_IT_LAST_VOI | SB_IT_LAST_CB;
1600 
1601 		if (v->id < 0 && (flags & SB_IT_NULLVOI)) {
1602 			if (icb(sb, v, NULL, &ctx))
1603 				return;
1604 			firstvoi = 0;
1605 			ctx.flags &= ~SB_IT_FIRST_CB;
1606 		}
1607 
1608 		/* If NULL voi, v->voistatmaxid == -1 */
1609 		for (j = 0; j <= v->voistatmaxid; j++) {
1610 			vs = &((struct voistat *)BLOB_OFFSET(sb,
1611 			    v->stats_off))[j];
1612 			if (vs->stype < 0 &&
1613 			    !(flags & SB_IT_NULLVOISTAT))
1614 				continue;
1615 
1616 			if (j == v->voistatmaxid) {
1617 				ctx.flags |= SB_IT_LAST_VOISTAT;
1618 				if (i == (NVOIS(sb) - 1))
1619 					ctx.flags |=
1620 					    SB_IT_LAST_CB;
1621 			} else
1622 				ctx.flags &= ~SB_IT_LAST_CB;
1623 
1624 			ctx.vsslot = j;
1625 			if (icb(sb, v, vs, &ctx))
1626 				return;
1627 
1628 			ctx.flags &= ~(SB_IT_FIRST_CB | SB_IT_FIRST_VOISTAT |
1629 			    SB_IT_LAST_VOISTAT);
1630 		}
1631 		ctx.flags &= ~(SB_IT_FIRST_VOI | SB_IT_LAST_VOI);
1632 	}
1633 }
1634 
1635 static inline void
1636 stats_voistatdata_tdgst_tostr(enum vsd_dtype voi_dtype __unused,
1637     const struct voistatdata_tdgst *tdgst, enum vsd_dtype tdgst_dtype,
1638     size_t tdgst_dsz __unused, enum sb_str_fmt fmt, struct sbuf *buf, int objdump)
1639 {
1640 	const struct ctdth32 *ctd32tree;
1641 	const struct ctdth64 *ctd64tree;
1642 	const struct voistatdata_tdgstctd32 *ctd32;
1643 	const struct voistatdata_tdgstctd64 *ctd64;
1644 	const char *fmtstr;
1645 	uint64_t smplcnt, compcnt;
1646 	int is32bit, qmaxstrlen;
1647 	uint16_t maxctds, curctds;
1648 
1649 	switch (tdgst_dtype) {
1650 	case VSD_DTYPE_TDGSTCLUST32:
1651 		smplcnt = CONSTVSD(tdgstclust32, tdgst)->smplcnt;
1652 		compcnt = CONSTVSD(tdgstclust32, tdgst)->compcnt;
1653 		maxctds = ARB_MAXNODES(&CONSTVSD(tdgstclust32, tdgst)->ctdtree);
1654 		curctds = ARB_CURNODES(&CONSTVSD(tdgstclust32, tdgst)->ctdtree);
1655 		ctd32tree = &CONSTVSD(tdgstclust32, tdgst)->ctdtree;
1656 		ctd32 = (objdump ? ARB_CNODE(ctd32tree, 0) :
1657 		    ARB_CMIN(ctdth32, ctd32tree));
1658 		qmaxstrlen = (ctd32 == NULL) ? 1 : Q_MAXSTRLEN(ctd32->mu, 10);
1659 		is32bit = 1;
1660 		ctd64tree = NULL;
1661 		ctd64 = NULL;
1662 		break;
1663 	case VSD_DTYPE_TDGSTCLUST64:
1664 		smplcnt = CONSTVSD(tdgstclust64, tdgst)->smplcnt;
1665 		compcnt = CONSTVSD(tdgstclust64, tdgst)->compcnt;
1666 		maxctds = ARB_MAXNODES(&CONSTVSD(tdgstclust64, tdgst)->ctdtree);
1667 		curctds = ARB_CURNODES(&CONSTVSD(tdgstclust64, tdgst)->ctdtree);
1668 		ctd64tree = &CONSTVSD(tdgstclust64, tdgst)->ctdtree;
1669 		ctd64 = (objdump ? ARB_CNODE(ctd64tree, 0) :
1670 		    ARB_CMIN(ctdth64, ctd64tree));
1671 		qmaxstrlen = (ctd64 == NULL) ? 1 : Q_MAXSTRLEN(ctd64->mu, 10);
1672 		is32bit = 0;
1673 		ctd32tree = NULL;
1674 		ctd32 = NULL;
1675 		break;
1676 	default:
1677 		return;
1678 	}
1679 
1680 	switch (fmt) {
1681 	case SB_STRFMT_FREEFORM:
1682 		fmtstr = "smplcnt=%ju, compcnt=%ju, maxctds=%hu, nctds=%hu";
1683 		break;
1684 	case SB_STRFMT_JSON:
1685 	default:
1686 		fmtstr =
1687 		    "\"smplcnt\":%ju,\"compcnt\":%ju,\"maxctds\":%hu,"
1688 		    "\"nctds\":%hu,\"ctds\":[";
1689 		break;
1690 	}
1691 	sbuf_printf(buf, fmtstr, (uintmax_t)smplcnt, (uintmax_t)compcnt,
1692 	    maxctds, curctds);
1693 
1694 	while ((is32bit ? NULL != ctd32 : NULL != ctd64)) {
1695 		char qstr[qmaxstrlen];
1696 
1697 		switch (fmt) {
1698 		case SB_STRFMT_FREEFORM:
1699 			fmtstr = "\n\t\t\t\t";
1700 			break;
1701 		case SB_STRFMT_JSON:
1702 		default:
1703 			fmtstr = "{";
1704 			break;
1705 		}
1706 		sbuf_cat(buf, fmtstr);
1707 
1708 		if (objdump) {
1709 			switch (fmt) {
1710 			case SB_STRFMT_FREEFORM:
1711 				fmtstr = "ctd[%hu].";
1712 				break;
1713 			case SB_STRFMT_JSON:
1714 			default:
1715 				fmtstr = "\"ctd\":%hu,";
1716 				break;
1717 			}
1718 			sbuf_printf(buf, fmtstr, is32bit ?
1719 			    ARB_SELFIDX(ctd32tree, ctd32) :
1720 			    ARB_SELFIDX(ctd64tree, ctd64));
1721 		}
1722 
1723 		switch (fmt) {
1724 		case SB_STRFMT_FREEFORM:
1725 			fmtstr = "{mu=";
1726 			break;
1727 		case SB_STRFMT_JSON:
1728 		default:
1729 			fmtstr = "\"mu\":";
1730 			break;
1731 		}
1732 		sbuf_cat(buf, fmtstr);
1733 		Q_TOSTR((is32bit ? ctd32->mu : ctd64->mu), -1, 10, qstr,
1734 		    sizeof(qstr));
1735 		sbuf_cat(buf, qstr);
1736 
1737 
1738 		switch (fmt) {
1739 		case SB_STRFMT_FREEFORM:
1740 			fmtstr = is32bit ? ",cnt=%u}" : ",cnt=%ju}";
1741 			break;
1742 		case SB_STRFMT_JSON:
1743 		default:
1744 			fmtstr = is32bit ? ",\"cnt\":%u}" : ",\"cnt\":%ju}";
1745 			break;
1746 		}
1747 		sbuf_printf(buf, fmtstr,
1748 		    is32bit ? ctd32->cnt : (uintmax_t)ctd64->cnt);
1749 
1750 		if (is32bit)
1751 			ctd32 = (objdump ? ARB_CNODE(ctd32tree,
1752 			    ARB_SELFIDX(ctd32tree, ctd32) + 1) :
1753 			    ARB_CNEXT(ctdth32, ctd32tree, ctd32));
1754 		else
1755 			ctd64 = (objdump ? ARB_CNODE(ctd64tree,
1756 			    ARB_SELFIDX(ctd64tree, ctd64) + 1) :
1757 			    ARB_CNEXT(ctdth64, ctd64tree, ctd64));
1758 
1759 		if (fmt == SB_STRFMT_JSON &&
1760 		    (is32bit ? NULL != ctd32 : NULL != ctd64))
1761 			sbuf_putc(buf, ',');
1762 	}
1763 	if (fmt == SB_STRFMT_JSON)
1764 		sbuf_cat(buf, "]");
1765 }
1766 
1767 static inline void
1768 stats_voistatdata_hist_tostr(enum vsd_dtype voi_dtype,
1769     const struct voistatdata_hist *hist, enum vsd_dtype hist_dtype,
1770     size_t hist_dsz, enum sb_str_fmt fmt, struct sbuf *buf, int objdump)
1771 {
1772 	const struct voistatdata_numeric *bkt_lb, *bkt_ub;
1773 	const char *fmtstr;
1774 	int is32bit;
1775 	uint16_t i, nbkts;
1776 
1777 	switch (hist_dtype) {
1778 	case VSD_DTYPE_CRHIST32:
1779 		nbkts = HIST_VSDSZ2NBKTS(crhist32, hist_dsz);
1780 		is32bit = 1;
1781 		break;
1782 	case VSD_DTYPE_DRHIST32:
1783 		nbkts = HIST_VSDSZ2NBKTS(drhist32, hist_dsz);
1784 		is32bit = 1;
1785 		break;
1786 	case VSD_DTYPE_DVHIST32:
1787 		nbkts = HIST_VSDSZ2NBKTS(dvhist32, hist_dsz);
1788 		is32bit = 1;
1789 		break;
1790 	case VSD_DTYPE_CRHIST64:
1791 		nbkts = HIST_VSDSZ2NBKTS(crhist64, hist_dsz);
1792 		is32bit = 0;
1793 		break;
1794 	case VSD_DTYPE_DRHIST64:
1795 		nbkts = HIST_VSDSZ2NBKTS(drhist64, hist_dsz);
1796 		is32bit = 0;
1797 		break;
1798 	case VSD_DTYPE_DVHIST64:
1799 		nbkts = HIST_VSDSZ2NBKTS(dvhist64, hist_dsz);
1800 		is32bit = 0;
1801 		break;
1802 	default:
1803 		return;
1804 	}
1805 
1806 	switch (fmt) {
1807 	case SB_STRFMT_FREEFORM:
1808 		fmtstr = "nbkts=%hu, ";
1809 		break;
1810 	case SB_STRFMT_JSON:
1811 	default:
1812 		fmtstr = "\"nbkts\":%hu,";
1813 		break;
1814 	}
1815 	sbuf_printf(buf, fmtstr, nbkts);
1816 
1817 	switch (fmt) {
1818 		case SB_STRFMT_FREEFORM:
1819 			fmtstr = (is32bit ? "oob=%u" : "oob=%ju");
1820 			break;
1821 		case SB_STRFMT_JSON:
1822 		default:
1823 			fmtstr = (is32bit ? "\"oob\":%u,\"bkts\":[" :
1824 			    "\"oob\":%ju,\"bkts\":[");
1825 			break;
1826 	}
1827 	sbuf_printf(buf, fmtstr, is32bit ? VSD_CONSTHIST_FIELDVAL(hist,
1828 	    hist_dtype, oob) : (uintmax_t)VSD_CONSTHIST_FIELDVAL(hist,
1829 	    hist_dtype, oob));
1830 
1831 	for (i = 0; i < nbkts; i++) {
1832 		switch (hist_dtype) {
1833 		case VSD_DTYPE_CRHIST32:
1834 		case VSD_DTYPE_CRHIST64:
1835 			bkt_lb = VSD_CONSTCRHIST_FIELDPTR(hist, hist_dtype,
1836 			    bkts[i].lb);
1837 			if (i < nbkts - 1)
1838 				bkt_ub = VSD_CONSTCRHIST_FIELDPTR(hist,
1839 				    hist_dtype, bkts[i + 1].lb);
1840 			else
1841 				bkt_ub = &numeric_limits[LIM_MAX][voi_dtype];
1842 			break;
1843 		case VSD_DTYPE_DRHIST32:
1844 		case VSD_DTYPE_DRHIST64:
1845 			bkt_lb = VSD_CONSTDRHIST_FIELDPTR(hist, hist_dtype,
1846 			    bkts[i].lb);
1847 			bkt_ub = VSD_CONSTDRHIST_FIELDPTR(hist, hist_dtype,
1848 			    bkts[i].ub);
1849 			break;
1850 		case VSD_DTYPE_DVHIST32:
1851 		case VSD_DTYPE_DVHIST64:
1852 			bkt_lb = bkt_ub = VSD_CONSTDVHIST_FIELDPTR(hist,
1853 			    hist_dtype, bkts[i].val);
1854 			break;
1855 		default:
1856 			break;
1857 		}
1858 
1859 		switch (fmt) {
1860 		case SB_STRFMT_FREEFORM:
1861 			fmtstr = "\n\t\t\t\t";
1862 			break;
1863 		case SB_STRFMT_JSON:
1864 		default:
1865 			fmtstr = "{";
1866 			break;
1867 		}
1868 		sbuf_cat(buf, fmtstr);
1869 
1870 		if (objdump) {
1871 			switch (fmt) {
1872 			case SB_STRFMT_FREEFORM:
1873 				fmtstr = "bkt[%hu].";
1874 				break;
1875 			case SB_STRFMT_JSON:
1876 			default:
1877 				fmtstr = "\"bkt\":%hu,";
1878 				break;
1879 			}
1880 			sbuf_printf(buf, fmtstr, i);
1881 		}
1882 
1883 		switch (fmt) {
1884 		case SB_STRFMT_FREEFORM:
1885 			fmtstr = "{lb=";
1886 			break;
1887 		case SB_STRFMT_JSON:
1888 		default:
1889 			fmtstr = "\"lb\":";
1890 			break;
1891 		}
1892 		sbuf_cat(buf, fmtstr);
1893 		stats_voistatdata_tostr((const struct voistatdata *)bkt_lb,
1894 		    voi_dtype, voi_dtype, sizeof(struct voistatdata_numeric),
1895 		    fmt, buf, objdump);
1896 
1897 		switch (fmt) {
1898 		case SB_STRFMT_FREEFORM:
1899 			fmtstr = ",ub=";
1900 			break;
1901 		case SB_STRFMT_JSON:
1902 		default:
1903 			fmtstr = ",\"ub\":";
1904 			break;
1905 		}
1906 		sbuf_cat(buf, fmtstr);
1907 		stats_voistatdata_tostr((const struct voistatdata *)bkt_ub,
1908 		    voi_dtype, voi_dtype, sizeof(struct voistatdata_numeric),
1909 		    fmt, buf, objdump);
1910 
1911 		switch (fmt) {
1912 		case SB_STRFMT_FREEFORM:
1913 			fmtstr = is32bit ? ",cnt=%u}" : ",cnt=%ju}";
1914 			break;
1915 		case SB_STRFMT_JSON:
1916 		default:
1917 			fmtstr = is32bit ? ",\"cnt\":%u}" : ",\"cnt\":%ju}";
1918 			break;
1919 		}
1920 		sbuf_printf(buf, fmtstr, is32bit ?
1921 		    VSD_CONSTHIST_FIELDVAL(hist, hist_dtype, bkts[i].cnt) :
1922 		    (uintmax_t)VSD_CONSTHIST_FIELDVAL(hist, hist_dtype,
1923 		    bkts[i].cnt));
1924 
1925 		if (fmt == SB_STRFMT_JSON && i < nbkts - 1)
1926 			sbuf_putc(buf, ',');
1927 	}
1928 	if (fmt == SB_STRFMT_JSON)
1929 		sbuf_cat(buf, "]");
1930 }
1931 
1932 int
1933 stats_voistatdata_tostr(const struct voistatdata *vsd, enum vsd_dtype voi_dtype,
1934     enum vsd_dtype vsd_dtype, size_t vsd_sz, enum sb_str_fmt fmt,
1935     struct sbuf *buf, int objdump)
1936 {
1937 	const char *fmtstr;
1938 
1939 	if (vsd == NULL || buf == NULL || voi_dtype >= VSD_NUM_DTYPES ||
1940 	    vsd_dtype >= VSD_NUM_DTYPES || fmt >= SB_STRFMT_NUM_FMTS)
1941 		return (EINVAL);
1942 
1943 	switch (vsd_dtype) {
1944 	case VSD_DTYPE_VOISTATE:
1945 		switch (fmt) {
1946 		case SB_STRFMT_FREEFORM:
1947 			fmtstr = "prev=";
1948 			break;
1949 		case SB_STRFMT_JSON:
1950 		default:
1951 			fmtstr = "\"prev\":";
1952 			break;
1953 		}
1954 		sbuf_cat(buf, fmtstr);
1955 		/*
1956 		 * Render prev by passing it as *vsd and voi_dtype as vsd_dtype.
1957 		 */
1958 		stats_voistatdata_tostr(
1959 		    (const struct voistatdata *)&CONSTVSD(voistate, vsd)->prev,
1960 		    voi_dtype, voi_dtype, vsd_sz, fmt, buf, objdump);
1961 		break;
1962 	case VSD_DTYPE_INT_S32:
1963 		sbuf_printf(buf, "%d", vsd->int32.s32);
1964 		break;
1965 	case VSD_DTYPE_INT_U32:
1966 		sbuf_printf(buf, "%u", vsd->int32.u32);
1967 		break;
1968 	case VSD_DTYPE_INT_S64:
1969 		sbuf_printf(buf, "%jd", (intmax_t)vsd->int64.s64);
1970 		break;
1971 	case VSD_DTYPE_INT_U64:
1972 		sbuf_printf(buf, "%ju", (uintmax_t)vsd->int64.u64);
1973 		break;
1974 	case VSD_DTYPE_INT_SLONG:
1975 		sbuf_printf(buf, "%ld", vsd->intlong.slong);
1976 		break;
1977 	case VSD_DTYPE_INT_ULONG:
1978 		sbuf_printf(buf, "%lu", vsd->intlong.ulong);
1979 		break;
1980 	case VSD_DTYPE_Q_S32:
1981 		{
1982 		char qstr[Q_MAXSTRLEN(vsd->q32.sq32, 10)];
1983 		Q_TOSTR((s32q_t)vsd->q32.sq32, -1, 10, qstr, sizeof(qstr));
1984 		sbuf_cat(buf, qstr);
1985 		}
1986 		break;
1987 	case VSD_DTYPE_Q_U32:
1988 		{
1989 		char qstr[Q_MAXSTRLEN(vsd->q32.uq32, 10)];
1990 		Q_TOSTR((u32q_t)vsd->q32.uq32, -1, 10, qstr, sizeof(qstr));
1991 		sbuf_cat(buf, qstr);
1992 		}
1993 		break;
1994 	case VSD_DTYPE_Q_S64:
1995 		{
1996 		char qstr[Q_MAXSTRLEN(vsd->q64.sq64, 10)];
1997 		Q_TOSTR((s64q_t)vsd->q64.sq64, -1, 10, qstr, sizeof(qstr));
1998 		sbuf_cat(buf, qstr);
1999 		}
2000 		break;
2001 	case VSD_DTYPE_Q_U64:
2002 		{
2003 		char qstr[Q_MAXSTRLEN(vsd->q64.uq64, 10)];
2004 		Q_TOSTR((u64q_t)vsd->q64.uq64, -1, 10, qstr, sizeof(qstr));
2005 		sbuf_cat(buf, qstr);
2006 		}
2007 		break;
2008 	case VSD_DTYPE_CRHIST32:
2009 	case VSD_DTYPE_DRHIST32:
2010 	case VSD_DTYPE_DVHIST32:
2011 	case VSD_DTYPE_CRHIST64:
2012 	case VSD_DTYPE_DRHIST64:
2013 	case VSD_DTYPE_DVHIST64:
2014 		stats_voistatdata_hist_tostr(voi_dtype, CONSTVSD(hist, vsd),
2015 		    vsd_dtype, vsd_sz, fmt, buf, objdump);
2016 		break;
2017 	case VSD_DTYPE_TDGSTCLUST32:
2018 	case VSD_DTYPE_TDGSTCLUST64:
2019 		stats_voistatdata_tdgst_tostr(voi_dtype,
2020 		    CONSTVSD(tdgst, vsd), vsd_dtype, vsd_sz, fmt, buf,
2021 		    objdump);
2022 		break;
2023 	default:
2024 		break;
2025 	}
2026 
2027 	return (sbuf_error(buf));
2028 }
2029 
2030 static void
2031 stats_v1_itercb_tostr_freeform(struct statsblobv1 *sb, struct voi *v,
2032     struct voistat *vs, struct sb_iter_ctx *ctx)
2033 {
2034 	struct sb_tostrcb_ctx *sctx;
2035 	struct metablob *tpl_mb;
2036 	struct sbuf *buf;
2037 	void *vsd;
2038 	uint8_t dump;
2039 
2040 	sctx = ctx->usrctx;
2041 	buf = sctx->buf;
2042 	tpl_mb = sctx->tpl ? sctx->tpl->mb : NULL;
2043 	dump = ((sctx->flags & SB_TOSTR_OBJDUMP) != 0);
2044 
2045 	if (ctx->flags & SB_IT_FIRST_CB) {
2046 		sbuf_printf(buf, "struct statsblobv1@%p", sb);
2047 		if (dump) {
2048 			sbuf_printf(buf, ", abi=%hhu, endian=%hhu, maxsz=%hu, "
2049 			    "cursz=%hu, created=%jd, lastrst=%jd, flags=0x%04hx, "
2050 			    "stats_off=%hu, statsdata_off=%hu",
2051 			    sb->abi, sb->endian, sb->maxsz, sb->cursz,
2052 			    sb->created, sb->lastrst, sb->flags, sb->stats_off,
2053 			    sb->statsdata_off);
2054 		}
2055 		sbuf_printf(buf, ", tplhash=%u", sb->tplhash);
2056 	}
2057 
2058 	if (ctx->flags & SB_IT_FIRST_VOISTAT) {
2059 		sbuf_printf(buf, "\n\tvois[%hd]: id=%hd", ctx->vslot, v->id);
2060 		if (v->id < 0)
2061 			return;
2062 		sbuf_printf(buf, ", name=\"%s\"", (tpl_mb == NULL) ? "" :
2063 		    tpl_mb->voi_meta[v->id].name);
2064 		if (dump)
2065 		    sbuf_printf(buf, ", flags=0x%04hx, dtype=%s, "
2066 		    "voistatmaxid=%hhd, stats_off=%hu", v->flags,
2067 		    vsd_dtype2name[v->dtype], v->voistatmaxid, v->stats_off);
2068 	}
2069 
2070 	if (!dump && vs->stype <= 0)
2071 		return;
2072 
2073 	sbuf_printf(buf, "\n\t\tvois[%hd]stat[%hhd]: stype=", v->id, ctx->vsslot);
2074 	if (vs->stype < 0) {
2075 		sbuf_printf(buf, "%hhd", vs->stype);
2076 		return;
2077 	} else
2078 		sbuf_printf(buf, "%s, errs=%hu", vs_stype2name[vs->stype],
2079 		    vs->errs);
2080 	vsd = BLOB_OFFSET(sb, vs->data_off);
2081 	if (dump)
2082 		sbuf_printf(buf, ", flags=0x%04x, dtype=%s, dsz=%hu, "
2083 		    "data_off=%hu", vs->flags, vsd_dtype2name[vs->dtype],
2084 		    vs->dsz, vs->data_off);
2085 
2086 	sbuf_printf(buf, "\n\t\t\tvoistatdata: ");
2087 	stats_voistatdata_tostr(vsd, v->dtype, vs->dtype, vs->dsz,
2088 	    sctx->fmt, buf, dump);
2089 }
2090 
2091 static void
2092 stats_v1_itercb_tostr_json(struct statsblobv1 *sb, struct voi *v, struct voistat *vs,
2093     struct sb_iter_ctx *ctx)
2094 {
2095 	struct sb_tostrcb_ctx *sctx;
2096 	struct metablob *tpl_mb;
2097 	struct sbuf *buf;
2098 	const char *fmtstr;
2099 	void *vsd;
2100 	uint8_t dump;
2101 
2102 	sctx = ctx->usrctx;
2103 	buf = sctx->buf;
2104 	tpl_mb = sctx->tpl ? sctx->tpl->mb : NULL;
2105 	dump = ((sctx->flags & SB_TOSTR_OBJDUMP) != 0);
2106 
2107 	if (ctx->flags & SB_IT_FIRST_CB) {
2108 		sbuf_putc(buf, '{');
2109 		if (dump) {
2110 			sbuf_printf(buf, "\"abi\":%hhu,\"endian\":%hhu,"
2111 			    "\"maxsz\":%hu,\"cursz\":%hu,\"created\":%jd,"
2112 			    "\"lastrst\":%jd,\"flags\":%hu,\"stats_off\":%hu,"
2113 			    "\"statsdata_off\":%hu,", sb->abi,
2114 			    sb->endian, sb->maxsz, sb->cursz, sb->created,
2115 			    sb->lastrst, sb->flags, sb->stats_off,
2116 			    sb->statsdata_off);
2117 		}
2118 
2119 		if (tpl_mb == NULL)
2120 			fmtstr = "\"tplname\":%s,\"tplhash\":%u,\"vois\":{";
2121 		else
2122 			fmtstr = "\"tplname\":\"%s\",\"tplhash\":%u,\"vois\":{";
2123 
2124 		sbuf_printf(buf, fmtstr, tpl_mb ? tpl_mb->tplname : "null",
2125 		    sb->tplhash);
2126 	}
2127 
2128 	if (ctx->flags & SB_IT_FIRST_VOISTAT) {
2129 		if (dump) {
2130 			sbuf_printf(buf, "\"[%d]\":{\"id\":%d", ctx->vslot,
2131 			    v->id);
2132 			if (v->id < 0) {
2133 				sbuf_printf(buf, "},");
2134 				return;
2135 			}
2136 
2137 			if (tpl_mb == NULL)
2138 				fmtstr = ",\"name\":%s,\"flags\":%hu,"
2139 				    "\"dtype\":\"%s\",\"voistatmaxid\":%hhd,"
2140 				    "\"stats_off\":%hu,";
2141 			else
2142 				fmtstr = ",\"name\":\"%s\",\"flags\":%hu,"
2143 				    "\"dtype\":\"%s\",\"voistatmaxid\":%hhd,"
2144 				    "\"stats_off\":%hu,";
2145 
2146 			sbuf_printf(buf, fmtstr, tpl_mb ?
2147 			    tpl_mb->voi_meta[v->id].name : "null", v->flags,
2148 			    vsd_dtype2name[v->dtype], v->voistatmaxid,
2149 			    v->stats_off);
2150 		} else {
2151 			if (tpl_mb == NULL) {
2152 				sbuf_printf(buf, "\"[%hd]\":{", v->id);
2153 			} else {
2154 				sbuf_printf(buf, "\"%s\":{",
2155 				    tpl_mb->voi_meta[v->id].name);
2156 			}
2157 		}
2158 		sbuf_cat(buf, "\"stats\":{");
2159 	}
2160 
2161 	vsd = BLOB_OFFSET(sb, vs->data_off);
2162 	if (dump) {
2163 		sbuf_printf(buf, "\"[%hhd]\":", ctx->vsslot);
2164 		if (vs->stype < 0) {
2165 			sbuf_printf(buf, "{\"stype\":-1},");
2166 			return;
2167 		}
2168 		sbuf_printf(buf, "{\"stype\":\"%s\",\"errs\":%hu,\"flags\":%hu,"
2169 		    "\"dtype\":\"%s\",\"data_off\":%hu,\"voistatdata\":{",
2170 		    vs_stype2name[vs->stype], vs->errs, vs->flags,
2171 		    vsd_dtype2name[vs->dtype], vs->data_off);
2172 	} else if (vs->stype > 0) {
2173 		if (tpl_mb == NULL)
2174 			sbuf_printf(buf, "\"[%hhd]\":", vs->stype);
2175 		else
2176 			sbuf_printf(buf, "\"%s\":", vs_stype2name[vs->stype]);
2177 	} else
2178 		return;
2179 
2180 	if ((vs->flags & VS_VSDVALID) || dump) {
2181 		if (!dump)
2182 			sbuf_printf(buf, "{\"errs\":%hu,", vs->errs);
2183 		/* Simple non-compound VSD types need a key. */
2184 		if (!vsd_compoundtype[vs->dtype])
2185 			sbuf_cat(buf, "\"val\":");
2186 		stats_voistatdata_tostr(vsd, v->dtype, vs->dtype, vs->dsz,
2187 		    sctx->fmt, buf, dump);
2188 		sbuf_cat(buf, dump ? "}}" : "}");
2189 	} else
2190 		sbuf_cat(buf, dump ? "null}" : "null");
2191 
2192 	if (ctx->flags & SB_IT_LAST_VOISTAT)
2193 		sbuf_cat(buf, "}}");
2194 
2195 	if (ctx->flags & SB_IT_LAST_CB)
2196 		sbuf_cat(buf, "}}");
2197 	else
2198 		sbuf_putc(buf, ',');
2199 }
2200 
2201 static int
2202 stats_v1_itercb_tostr(struct statsblobv1 *sb, struct voi *v, struct voistat *vs,
2203     struct sb_iter_ctx *ctx)
2204 {
2205 	struct sb_tostrcb_ctx *sctx;
2206 
2207 	sctx = ctx->usrctx;
2208 
2209 	switch (sctx->fmt) {
2210 	case SB_STRFMT_FREEFORM:
2211 		stats_v1_itercb_tostr_freeform(sb, v, vs, ctx);
2212 		break;
2213 	case SB_STRFMT_JSON:
2214 		stats_v1_itercb_tostr_json(sb, v, vs, ctx);
2215 		break;
2216 	default:
2217 		break;
2218 	}
2219 
2220 	return (sbuf_error(sctx->buf));
2221 }
2222 
2223 int
2224 stats_v1_blob_tostr(struct statsblobv1 *sb, struct sbuf *buf,
2225     enum sb_str_fmt fmt, uint32_t flags)
2226 {
2227 	struct sb_tostrcb_ctx sctx;
2228 	uint32_t iflags;
2229 
2230 	if (sb == NULL || sb->abi != STATS_ABI_V1 || buf == NULL ||
2231 	    fmt >= SB_STRFMT_NUM_FMTS)
2232 		return (EINVAL);
2233 
2234 	sctx.buf = buf;
2235 	sctx.fmt = fmt;
2236 	sctx.flags = flags;
2237 
2238 	if (flags & SB_TOSTR_META) {
2239 		if (stats_tpl_fetch(stats_tpl_fetch_allocid(NULL, sb->tplhash),
2240 		    &sctx.tpl))
2241 			return (EINVAL);
2242 	} else
2243 		sctx.tpl = NULL;
2244 
2245 	iflags = 0;
2246 	if (flags & SB_TOSTR_OBJDUMP)
2247 		iflags |= (SB_IT_NULLVOI | SB_IT_NULLVOISTAT);
2248 	stats_v1_blob_iter(sb, stats_v1_itercb_tostr, &sctx, iflags);
2249 
2250 	return (sbuf_error(buf));
2251 }
2252 
2253 static int
2254 stats_v1_itercb_visit(struct statsblobv1 *sb, struct voi *v,
2255     struct voistat *vs, struct sb_iter_ctx *ctx)
2256 {
2257 	struct sb_visitcb_ctx *vctx;
2258 	struct sb_visit sbv;
2259 
2260 	vctx = ctx->usrctx;
2261 
2262 	sbv.tplhash = sb->tplhash;
2263 	sbv.voi_id = v->id;
2264 	sbv.voi_dtype = v->dtype;
2265 	sbv.vs_stype = vs->stype;
2266 	sbv.vs_dtype = vs->dtype;
2267 	sbv.vs_dsz = vs->dsz;
2268 	sbv.vs_data = BLOB_OFFSET(sb, vs->data_off);
2269 	sbv.vs_errs = vs->errs;
2270 	sbv.flags = ctx->flags & (SB_IT_FIRST_CB | SB_IT_LAST_CB |
2271 	    SB_IT_FIRST_VOI | SB_IT_LAST_VOI | SB_IT_FIRST_VOISTAT |
2272 	    SB_IT_LAST_VOISTAT);
2273 
2274 	return (vctx->cb(&sbv, vctx->usrctx));
2275 }
2276 
2277 int
2278 stats_v1_blob_visit(struct statsblobv1 *sb, stats_blob_visitcb_t func,
2279     void *usrctx)
2280 {
2281 	struct sb_visitcb_ctx vctx;
2282 
2283 	if (sb == NULL || sb->abi != STATS_ABI_V1 || func == NULL)
2284 		return (EINVAL);
2285 
2286 	vctx.cb = func;
2287 	vctx.usrctx = usrctx;
2288 
2289 	stats_v1_blob_iter(sb, stats_v1_itercb_visit, &vctx, 0);
2290 
2291 	return (0);
2292 }
2293 
2294 static int
2295 stats_v1_icb_reset_voistat(struct statsblobv1 *sb, struct voi *v __unused,
2296     struct voistat *vs, struct sb_iter_ctx *ctx __unused)
2297 {
2298 	void *vsd;
2299 
2300 	if (vs->stype == VS_STYPE_VOISTATE)
2301 		return (0);
2302 
2303 	vsd = BLOB_OFFSET(sb, vs->data_off);
2304 
2305 	/* Perform the stat type's default reset action. */
2306 	switch (vs->stype) {
2307 	case VS_STYPE_SUM:
2308 		switch (vs->dtype) {
2309 		case VSD_DTYPE_Q_S32:
2310 			Q_SIFVAL(VSD(q32, vsd)->sq32, 0);
2311 			break;
2312 		case VSD_DTYPE_Q_U32:
2313 			Q_SIFVAL(VSD(q32, vsd)->uq32, 0);
2314 			break;
2315 		case VSD_DTYPE_Q_S64:
2316 			Q_SIFVAL(VSD(q64, vsd)->sq64, 0);
2317 			break;
2318 		case VSD_DTYPE_Q_U64:
2319 			Q_SIFVAL(VSD(q64, vsd)->uq64, 0);
2320 			break;
2321 		default:
2322 			bzero(vsd, vs->dsz);
2323 			break;
2324 		}
2325 		break;
2326 	case VS_STYPE_MAX:
2327 		switch (vs->dtype) {
2328 		case VSD_DTYPE_Q_S32:
2329 			Q_SIFVAL(VSD(q32, vsd)->sq32,
2330 			    Q_IFMINVAL(VSD(q32, vsd)->sq32));
2331 			break;
2332 		case VSD_DTYPE_Q_U32:
2333 			Q_SIFVAL(VSD(q32, vsd)->uq32,
2334 			    Q_IFMINVAL(VSD(q32, vsd)->uq32));
2335 			break;
2336 		case VSD_DTYPE_Q_S64:
2337 			Q_SIFVAL(VSD(q64, vsd)->sq64,
2338 			    Q_IFMINVAL(VSD(q64, vsd)->sq64));
2339 			break;
2340 		case VSD_DTYPE_Q_U64:
2341 			Q_SIFVAL(VSD(q64, vsd)->uq64,
2342 			    Q_IFMINVAL(VSD(q64, vsd)->uq64));
2343 			break;
2344 		default:
2345 			memcpy(vsd, &numeric_limits[LIM_MIN][vs->dtype],
2346 			    vs->dsz);
2347 			break;
2348 		}
2349 		break;
2350 	case VS_STYPE_MIN:
2351 		switch (vs->dtype) {
2352 		case VSD_DTYPE_Q_S32:
2353 			Q_SIFVAL(VSD(q32, vsd)->sq32,
2354 			    Q_IFMAXVAL(VSD(q32, vsd)->sq32));
2355 			break;
2356 		case VSD_DTYPE_Q_U32:
2357 			Q_SIFVAL(VSD(q32, vsd)->uq32,
2358 			    Q_IFMAXVAL(VSD(q32, vsd)->uq32));
2359 			break;
2360 		case VSD_DTYPE_Q_S64:
2361 			Q_SIFVAL(VSD(q64, vsd)->sq64,
2362 			    Q_IFMAXVAL(VSD(q64, vsd)->sq64));
2363 			break;
2364 		case VSD_DTYPE_Q_U64:
2365 			Q_SIFVAL(VSD(q64, vsd)->uq64,
2366 			    Q_IFMAXVAL(VSD(q64, vsd)->uq64));
2367 			break;
2368 		default:
2369 			memcpy(vsd, &numeric_limits[LIM_MAX][vs->dtype],
2370 			    vs->dsz);
2371 			break;
2372 		}
2373 		break;
2374 	case VS_STYPE_HIST:
2375 		{
2376 		/* Reset bucket counts. */
2377 		struct voistatdata_hist *hist;
2378 		int i, is32bit;
2379 		uint16_t nbkts;
2380 
2381 		hist = VSD(hist, vsd);
2382 		switch (vs->dtype) {
2383 		case VSD_DTYPE_CRHIST32:
2384 			nbkts = HIST_VSDSZ2NBKTS(crhist32, vs->dsz);
2385 			is32bit = 1;
2386 			break;
2387 		case VSD_DTYPE_DRHIST32:
2388 			nbkts = HIST_VSDSZ2NBKTS(drhist32, vs->dsz);
2389 			is32bit = 1;
2390 			break;
2391 		case VSD_DTYPE_DVHIST32:
2392 			nbkts = HIST_VSDSZ2NBKTS(dvhist32, vs->dsz);
2393 			is32bit = 1;
2394 			break;
2395 		case VSD_DTYPE_CRHIST64:
2396 			nbkts = HIST_VSDSZ2NBKTS(crhist64, vs->dsz);
2397 			is32bit = 0;
2398 			break;
2399 		case VSD_DTYPE_DRHIST64:
2400 			nbkts = HIST_VSDSZ2NBKTS(drhist64, vs->dsz);
2401 			is32bit = 0;
2402 			break;
2403 		case VSD_DTYPE_DVHIST64:
2404 			nbkts = HIST_VSDSZ2NBKTS(dvhist64, vs->dsz);
2405 			is32bit = 0;
2406 			break;
2407 		default:
2408 			return (0);
2409 		}
2410 
2411 		bzero(VSD_HIST_FIELDPTR(hist, vs->dtype, oob),
2412 		    is32bit ? sizeof(uint32_t) : sizeof(uint64_t));
2413 		for (i = nbkts - 1; i >= 0; i--) {
2414 			bzero(VSD_HIST_FIELDPTR(hist, vs->dtype,
2415 			    bkts[i].cnt), is32bit ? sizeof(uint32_t) :
2416 			    sizeof(uint64_t));
2417 		}
2418 		break;
2419 		}
2420 	case VS_STYPE_TDGST:
2421 		{
2422 		/* Reset sample count centroids array/tree. */
2423 		struct voistatdata_tdgst *tdgst;
2424 		struct ctdth32 *ctd32tree;
2425 		struct ctdth64 *ctd64tree;
2426 		struct voistatdata_tdgstctd32 *ctd32;
2427 		struct voistatdata_tdgstctd64 *ctd64;
2428 
2429 		tdgst = VSD(tdgst, vsd);
2430 		switch (vs->dtype) {
2431 		case VSD_DTYPE_TDGSTCLUST32:
2432 			VSD(tdgstclust32, tdgst)->smplcnt = 0;
2433 			VSD(tdgstclust32, tdgst)->compcnt = 0;
2434 			ctd32tree = &VSD(tdgstclust32, tdgst)->ctdtree;
2435 			ARB_INIT(ctd32, ctdlnk, ctd32tree,
2436 			    ARB_MAXNODES(ctd32tree)) {
2437 				ctd32->cnt = 0;
2438 				Q_SIFVAL(ctd32->mu, 0);
2439 			}
2440 #ifdef DIAGNOSTIC
2441 			RB_INIT(&VSD(tdgstclust32, tdgst)->rbctdtree);
2442 #endif
2443 		break;
2444 		case VSD_DTYPE_TDGSTCLUST64:
2445 			VSD(tdgstclust64, tdgst)->smplcnt = 0;
2446 			VSD(tdgstclust64, tdgst)->compcnt = 0;
2447 			ctd64tree = &VSD(tdgstclust64, tdgst)->ctdtree;
2448 			ARB_INIT(ctd64, ctdlnk, ctd64tree,
2449 			    ARB_MAXNODES(ctd64tree)) {
2450 				ctd64->cnt = 0;
2451 				Q_SIFVAL(ctd64->mu, 0);
2452 			}
2453 #ifdef DIAGNOSTIC
2454 			RB_INIT(&VSD(tdgstclust64, tdgst)->rbctdtree);
2455 #endif
2456 		break;
2457 		default:
2458 			return (0);
2459 		}
2460 		break;
2461 		}
2462 	default:
2463 		KASSERT(0, ("Unknown VOI stat type %d", vs->stype));
2464 		break;
2465 	}
2466 
2467 	vs->errs = 0;
2468 	vs->flags &= ~VS_VSDVALID;
2469 
2470 	return (0);
2471 }
2472 
2473 int
2474 stats_v1_blob_snapshot(struct statsblobv1 **dst, size_t dstmaxsz,
2475     struct statsblobv1 *src, uint32_t flags)
2476 {
2477 	int error;
2478 
2479 	if (src != NULL && src->abi == STATS_ABI_V1) {
2480 		error = stats_v1_blob_clone(dst, dstmaxsz, src, flags);
2481 		if (!error) {
2482 			if (flags & SB_CLONE_RSTSRC) {
2483 				stats_v1_blob_iter(src,
2484 				    stats_v1_icb_reset_voistat, NULL, 0);
2485 				src->lastrst = stats_sbinuptime();
2486 			}
2487 			stats_v1_blob_finalise(*dst);
2488 		}
2489 	} else
2490 		error = EINVAL;
2491 
2492 	return (error);
2493 }
2494 
2495 static inline int
2496 stats_v1_voi_update_max(enum vsd_dtype voi_dtype __unused,
2497     struct voistatdata *voival, struct voistat *vs, void *vsd)
2498 {
2499 	int error;
2500 
2501 	KASSERT(vs->dtype < VSD_NUM_DTYPES,
2502 	    ("Unknown VSD dtype %d", vs->dtype));
2503 
2504 	error = 0;
2505 
2506 	switch (vs->dtype) {
2507 	case VSD_DTYPE_INT_S32:
2508 		if (VSD(int32, vsd)->s32 < voival->int32.s32) {
2509 			VSD(int32, vsd)->s32 = voival->int32.s32;
2510 			vs->flags |= VS_VSDVALID;
2511 		}
2512 		break;
2513 	case VSD_DTYPE_INT_U32:
2514 		if (VSD(int32, vsd)->u32 < voival->int32.u32) {
2515 			VSD(int32, vsd)->u32 = voival->int32.u32;
2516 			vs->flags |= VS_VSDVALID;
2517 		}
2518 		break;
2519 	case VSD_DTYPE_INT_S64:
2520 		if (VSD(int64, vsd)->s64 < voival->int64.s64) {
2521 			VSD(int64, vsd)->s64 = voival->int64.s64;
2522 			vs->flags |= VS_VSDVALID;
2523 		}
2524 		break;
2525 	case VSD_DTYPE_INT_U64:
2526 		if (VSD(int64, vsd)->u64 < voival->int64.u64) {
2527 			VSD(int64, vsd)->u64 = voival->int64.u64;
2528 			vs->flags |= VS_VSDVALID;
2529 		}
2530 		break;
2531 	case VSD_DTYPE_INT_SLONG:
2532 		if (VSD(intlong, vsd)->slong < voival->intlong.slong) {
2533 			VSD(intlong, vsd)->slong = voival->intlong.slong;
2534 			vs->flags |= VS_VSDVALID;
2535 		}
2536 		break;
2537 	case VSD_DTYPE_INT_ULONG:
2538 		if (VSD(intlong, vsd)->ulong < voival->intlong.ulong) {
2539 			VSD(intlong, vsd)->ulong = voival->intlong.ulong;
2540 			vs->flags |= VS_VSDVALID;
2541 		}
2542 		break;
2543 	case VSD_DTYPE_Q_S32:
2544 		if (Q_QLTQ(VSD(q32, vsd)->sq32, voival->q32.sq32) &&
2545 		    (0 == (error = Q_QCPYVALQ(&VSD(q32, vsd)->sq32,
2546 		    voival->q32.sq32)))) {
2547 			vs->flags |= VS_VSDVALID;
2548 		}
2549 		break;
2550 	case VSD_DTYPE_Q_U32:
2551 		if (Q_QLTQ(VSD(q32, vsd)->uq32, voival->q32.uq32) &&
2552 		    (0 == (error = Q_QCPYVALQ(&VSD(q32, vsd)->uq32,
2553 		    voival->q32.uq32)))) {
2554 			vs->flags |= VS_VSDVALID;
2555 		}
2556 		break;
2557 	case VSD_DTYPE_Q_S64:
2558 		if (Q_QLTQ(VSD(q64, vsd)->sq64, voival->q64.sq64) &&
2559 		    (0 == (error = Q_QCPYVALQ(&VSD(q64, vsd)->sq64,
2560 		    voival->q64.sq64)))) {
2561 			vs->flags |= VS_VSDVALID;
2562 		}
2563 		break;
2564 	case VSD_DTYPE_Q_U64:
2565 		if (Q_QLTQ(VSD(q64, vsd)->uq64, voival->q64.uq64) &&
2566 		    (0 == (error = Q_QCPYVALQ(&VSD(q64, vsd)->uq64,
2567 		    voival->q64.uq64)))) {
2568 			vs->flags |= VS_VSDVALID;
2569 		}
2570 		break;
2571 	default:
2572 		error = EINVAL;
2573 		break;
2574 	}
2575 
2576 	return (error);
2577 }
2578 
2579 static inline int
2580 stats_v1_voi_update_min(enum vsd_dtype voi_dtype __unused,
2581     struct voistatdata *voival, struct voistat *vs, void *vsd)
2582 {
2583 	int error;
2584 
2585 	KASSERT(vs->dtype < VSD_NUM_DTYPES,
2586 	    ("Unknown VSD dtype %d", vs->dtype));
2587 
2588 	error = 0;
2589 
2590 	switch (vs->dtype) {
2591 	case VSD_DTYPE_INT_S32:
2592 		if (VSD(int32, vsd)->s32 > voival->int32.s32) {
2593 			VSD(int32, vsd)->s32 = voival->int32.s32;
2594 			vs->flags |= VS_VSDVALID;
2595 		}
2596 		break;
2597 	case VSD_DTYPE_INT_U32:
2598 		if (VSD(int32, vsd)->u32 > voival->int32.u32) {
2599 			VSD(int32, vsd)->u32 = voival->int32.u32;
2600 			vs->flags |= VS_VSDVALID;
2601 		}
2602 		break;
2603 	case VSD_DTYPE_INT_S64:
2604 		if (VSD(int64, vsd)->s64 > voival->int64.s64) {
2605 			VSD(int64, vsd)->s64 = voival->int64.s64;
2606 			vs->flags |= VS_VSDVALID;
2607 		}
2608 		break;
2609 	case VSD_DTYPE_INT_U64:
2610 		if (VSD(int64, vsd)->u64 > voival->int64.u64) {
2611 			VSD(int64, vsd)->u64 = voival->int64.u64;
2612 			vs->flags |= VS_VSDVALID;
2613 		}
2614 		break;
2615 	case VSD_DTYPE_INT_SLONG:
2616 		if (VSD(intlong, vsd)->slong > voival->intlong.slong) {
2617 			VSD(intlong, vsd)->slong = voival->intlong.slong;
2618 			vs->flags |= VS_VSDVALID;
2619 		}
2620 		break;
2621 	case VSD_DTYPE_INT_ULONG:
2622 		if (VSD(intlong, vsd)->ulong > voival->intlong.ulong) {
2623 			VSD(intlong, vsd)->ulong = voival->intlong.ulong;
2624 			vs->flags |= VS_VSDVALID;
2625 		}
2626 		break;
2627 	case VSD_DTYPE_Q_S32:
2628 		if (Q_QGTQ(VSD(q32, vsd)->sq32, voival->q32.sq32) &&
2629 		    (0 == (error = Q_QCPYVALQ(&VSD(q32, vsd)->sq32,
2630 		    voival->q32.sq32)))) {
2631 			vs->flags |= VS_VSDVALID;
2632 		}
2633 		break;
2634 	case VSD_DTYPE_Q_U32:
2635 		if (Q_QGTQ(VSD(q32, vsd)->uq32, voival->q32.uq32) &&
2636 		    (0 == (error = Q_QCPYVALQ(&VSD(q32, vsd)->uq32,
2637 		    voival->q32.uq32)))) {
2638 			vs->flags |= VS_VSDVALID;
2639 		}
2640 		break;
2641 	case VSD_DTYPE_Q_S64:
2642 		if (Q_QGTQ(VSD(q64, vsd)->sq64, voival->q64.sq64) &&
2643 		    (0 == (error = Q_QCPYVALQ(&VSD(q64, vsd)->sq64,
2644 		    voival->q64.sq64)))) {
2645 			vs->flags |= VS_VSDVALID;
2646 		}
2647 		break;
2648 	case VSD_DTYPE_Q_U64:
2649 		if (Q_QGTQ(VSD(q64, vsd)->uq64, voival->q64.uq64) &&
2650 		    (0 == (error = Q_QCPYVALQ(&VSD(q64, vsd)->uq64,
2651 		    voival->q64.uq64)))) {
2652 			vs->flags |= VS_VSDVALID;
2653 		}
2654 		break;
2655 	default:
2656 		error = EINVAL;
2657 		break;
2658 	}
2659 
2660 	return (error);
2661 }
2662 
2663 static inline int
2664 stats_v1_voi_update_sum(enum vsd_dtype voi_dtype __unused,
2665     struct voistatdata *voival, struct voistat *vs, void *vsd)
2666 {
2667 	int error;
2668 
2669 	KASSERT(vs->dtype < VSD_NUM_DTYPES,
2670 	    ("Unknown VSD dtype %d", vs->dtype));
2671 
2672 	error = 0;
2673 
2674 	switch (vs->dtype) {
2675 	case VSD_DTYPE_INT_S32:
2676 		VSD(int32, vsd)->s32 += voival->int32.s32;
2677 		break;
2678 	case VSD_DTYPE_INT_U32:
2679 		VSD(int32, vsd)->u32 += voival->int32.u32;
2680 		break;
2681 	case VSD_DTYPE_INT_S64:
2682 		VSD(int64, vsd)->s64 += voival->int64.s64;
2683 		break;
2684 	case VSD_DTYPE_INT_U64:
2685 		VSD(int64, vsd)->u64 += voival->int64.u64;
2686 		break;
2687 	case VSD_DTYPE_INT_SLONG:
2688 		VSD(intlong, vsd)->slong += voival->intlong.slong;
2689 		break;
2690 	case VSD_DTYPE_INT_ULONG:
2691 		VSD(intlong, vsd)->ulong += voival->intlong.ulong;
2692 		break;
2693 	case VSD_DTYPE_Q_S32:
2694 		error = Q_QADDQ(&VSD(q32, vsd)->sq32, voival->q32.sq32);
2695 		break;
2696 	case VSD_DTYPE_Q_U32:
2697 		error = Q_QADDQ(&VSD(q32, vsd)->uq32, voival->q32.uq32);
2698 		break;
2699 	case VSD_DTYPE_Q_S64:
2700 		error = Q_QADDQ(&VSD(q64, vsd)->sq64, voival->q64.sq64);
2701 		break;
2702 	case VSD_DTYPE_Q_U64:
2703 		error = Q_QADDQ(&VSD(q64, vsd)->uq64, voival->q64.uq64);
2704 		break;
2705 	default:
2706 		error = EINVAL;
2707 		break;
2708 	}
2709 
2710 	if (!error)
2711 		vs->flags |= VS_VSDVALID;
2712 
2713 	return (error);
2714 }
2715 
2716 static inline int
2717 stats_v1_voi_update_hist(enum vsd_dtype voi_dtype, struct voistatdata *voival,
2718     struct voistat *vs, struct voistatdata_hist *hist)
2719 {
2720 	struct voistatdata_numeric *bkt_lb, *bkt_ub;
2721 	uint64_t *oob64, *cnt64;
2722 	uint32_t *oob32, *cnt32;
2723 	int error, i, found, is32bit, has_ub, eq_only;
2724 
2725 	error = 0;
2726 
2727 	switch (vs->dtype) {
2728 	case VSD_DTYPE_CRHIST32:
2729 		i = HIST_VSDSZ2NBKTS(crhist32, vs->dsz);
2730 		is32bit = 1;
2731 		has_ub = eq_only = 0;
2732 		oob32 = &VSD(crhist32, hist)->oob;
2733 		break;
2734 	case VSD_DTYPE_DRHIST32:
2735 		i = HIST_VSDSZ2NBKTS(drhist32, vs->dsz);
2736 		is32bit = has_ub = 1;
2737 		eq_only = 0;
2738 		oob32 = &VSD(drhist32, hist)->oob;
2739 		break;
2740 	case VSD_DTYPE_DVHIST32:
2741 		i = HIST_VSDSZ2NBKTS(dvhist32, vs->dsz);
2742 		is32bit = eq_only = 1;
2743 		has_ub = 0;
2744 		oob32 = &VSD(dvhist32, hist)->oob;
2745 		break;
2746 	case VSD_DTYPE_CRHIST64:
2747 		i = HIST_VSDSZ2NBKTS(crhist64, vs->dsz);
2748 		is32bit = has_ub = eq_only = 0;
2749 		oob64 = &VSD(crhist64, hist)->oob;
2750 		break;
2751 	case VSD_DTYPE_DRHIST64:
2752 		i = HIST_VSDSZ2NBKTS(drhist64, vs->dsz);
2753 		is32bit = eq_only = 0;
2754 		has_ub = 1;
2755 		oob64 = &VSD(drhist64, hist)->oob;
2756 		break;
2757 	case VSD_DTYPE_DVHIST64:
2758 		i = HIST_VSDSZ2NBKTS(dvhist64, vs->dsz);
2759 		is32bit = has_ub = 0;
2760 		eq_only = 1;
2761 		oob64 = &VSD(dvhist64, hist)->oob;
2762 		break;
2763 	default:
2764 		return (EINVAL);
2765 	}
2766 	i--; /* Adjust for 0-based array index. */
2767 
2768 	/* XXXLAS: Should probably use a better bucket search algorithm. ARB? */
2769 	for (found = 0; i >= 0 && !found; i--) {
2770 		switch (vs->dtype) {
2771 		case VSD_DTYPE_CRHIST32:
2772 			bkt_lb = &VSD(crhist32, hist)->bkts[i].lb;
2773 			cnt32 = &VSD(crhist32, hist)->bkts[i].cnt;
2774 			break;
2775 		case VSD_DTYPE_DRHIST32:
2776 			bkt_lb = &VSD(drhist32, hist)->bkts[i].lb;
2777 			bkt_ub = &VSD(drhist32, hist)->bkts[i].ub;
2778 			cnt32 = &VSD(drhist32, hist)->bkts[i].cnt;
2779 			break;
2780 		case VSD_DTYPE_DVHIST32:
2781 			bkt_lb = &VSD(dvhist32, hist)->bkts[i].val;
2782 			cnt32 = &VSD(dvhist32, hist)->bkts[i].cnt;
2783 			break;
2784 		case VSD_DTYPE_CRHIST64:
2785 			bkt_lb = &VSD(crhist64, hist)->bkts[i].lb;
2786 			cnt64 = &VSD(crhist64, hist)->bkts[i].cnt;
2787 			break;
2788 		case VSD_DTYPE_DRHIST64:
2789 			bkt_lb = &VSD(drhist64, hist)->bkts[i].lb;
2790 			bkt_ub = &VSD(drhist64, hist)->bkts[i].ub;
2791 			cnt64 = &VSD(drhist64, hist)->bkts[i].cnt;
2792 			break;
2793 		case VSD_DTYPE_DVHIST64:
2794 			bkt_lb = &VSD(dvhist64, hist)->bkts[i].val;
2795 			cnt64 = &VSD(dvhist64, hist)->bkts[i].cnt;
2796 			break;
2797 		default:
2798 			return (EINVAL);
2799 		}
2800 
2801 		switch (voi_dtype) {
2802 		case VSD_DTYPE_INT_S32:
2803 			if (voival->int32.s32 >= bkt_lb->int32.s32) {
2804 				if ((eq_only && voival->int32.s32 ==
2805 				    bkt_lb->int32.s32) ||
2806 				    (!eq_only && (!has_ub ||
2807 				    voival->int32.s32 < bkt_ub->int32.s32)))
2808 					found = 1;
2809 			}
2810 			break;
2811 		case VSD_DTYPE_INT_U32:
2812 			if (voival->int32.u32 >= bkt_lb->int32.u32) {
2813 				if ((eq_only && voival->int32.u32 ==
2814 				    bkt_lb->int32.u32) ||
2815 				    (!eq_only && (!has_ub ||
2816 				    voival->int32.u32 < bkt_ub->int32.u32)))
2817 					found = 1;
2818 			}
2819 			break;
2820 		case VSD_DTYPE_INT_S64:
2821 			if (voival->int64.s64 >= bkt_lb->int64.s64)
2822 				if ((eq_only && voival->int64.s64 ==
2823 				    bkt_lb->int64.s64) ||
2824 				    (!eq_only && (!has_ub ||
2825 				    voival->int64.s64 < bkt_ub->int64.s64)))
2826 					found = 1;
2827 			break;
2828 		case VSD_DTYPE_INT_U64:
2829 			if (voival->int64.u64 >= bkt_lb->int64.u64)
2830 				if ((eq_only && voival->int64.u64 ==
2831 				    bkt_lb->int64.u64) ||
2832 				    (!eq_only && (!has_ub ||
2833 				    voival->int64.u64 < bkt_ub->int64.u64)))
2834 					found = 1;
2835 			break;
2836 		case VSD_DTYPE_INT_SLONG:
2837 			if (voival->intlong.slong >= bkt_lb->intlong.slong)
2838 				if ((eq_only && voival->intlong.slong ==
2839 				    bkt_lb->intlong.slong) ||
2840 				    (!eq_only && (!has_ub ||
2841 				    voival->intlong.slong <
2842 				    bkt_ub->intlong.slong)))
2843 					found = 1;
2844 			break;
2845 		case VSD_DTYPE_INT_ULONG:
2846 			if (voival->intlong.ulong >= bkt_lb->intlong.ulong)
2847 				if ((eq_only && voival->intlong.ulong ==
2848 				    bkt_lb->intlong.ulong) ||
2849 				    (!eq_only && (!has_ub ||
2850 				    voival->intlong.ulong <
2851 				    bkt_ub->intlong.ulong)))
2852 					found = 1;
2853 			break;
2854 		case VSD_DTYPE_Q_S32:
2855 			if (Q_QGEQ(voival->q32.sq32, bkt_lb->q32.sq32))
2856 				if ((eq_only && Q_QEQ(voival->q32.sq32,
2857 				    bkt_lb->q32.sq32)) ||
2858 				    (!eq_only && (!has_ub ||
2859 				    Q_QLTQ(voival->q32.sq32,
2860 				    bkt_ub->q32.sq32))))
2861 					found = 1;
2862 			break;
2863 		case VSD_DTYPE_Q_U32:
2864 			if (Q_QGEQ(voival->q32.uq32, bkt_lb->q32.uq32))
2865 				if ((eq_only && Q_QEQ(voival->q32.uq32,
2866 				    bkt_lb->q32.uq32)) ||
2867 				    (!eq_only && (!has_ub ||
2868 				    Q_QLTQ(voival->q32.uq32,
2869 				    bkt_ub->q32.uq32))))
2870 					found = 1;
2871 			break;
2872 		case VSD_DTYPE_Q_S64:
2873 			if (Q_QGEQ(voival->q64.sq64, bkt_lb->q64.sq64))
2874 				if ((eq_only && Q_QEQ(voival->q64.sq64,
2875 				    bkt_lb->q64.sq64)) ||
2876 				    (!eq_only && (!has_ub ||
2877 				    Q_QLTQ(voival->q64.sq64,
2878 				    bkt_ub->q64.sq64))))
2879 					found = 1;
2880 			break;
2881 		case VSD_DTYPE_Q_U64:
2882 			if (Q_QGEQ(voival->q64.uq64, bkt_lb->q64.uq64))
2883 				if ((eq_only && Q_QEQ(voival->q64.uq64,
2884 				    bkt_lb->q64.uq64)) ||
2885 				    (!eq_only && (!has_ub ||
2886 				    Q_QLTQ(voival->q64.uq64,
2887 				    bkt_ub->q64.uq64))))
2888 					found = 1;
2889 			break;
2890 		default:
2891 			break;
2892 		}
2893 	}
2894 
2895 	if (found) {
2896 		if (is32bit)
2897 			*cnt32 += 1;
2898 		else
2899 			*cnt64 += 1;
2900 	} else {
2901 		if (is32bit)
2902 			*oob32 += 1;
2903 		else
2904 			*oob64 += 1;
2905 	}
2906 
2907 	vs->flags |= VS_VSDVALID;
2908 	return (error);
2909 }
2910 
2911 static inline int
2912 stats_v1_vsd_tdgst_compress(enum vsd_dtype vs_dtype,
2913     struct voistatdata_tdgst *tdgst, int attempt)
2914 {
2915 	struct ctdth32 *ctd32tree;
2916 	struct ctdth64 *ctd64tree;
2917 	struct voistatdata_tdgstctd32 *ctd32;
2918 	struct voistatdata_tdgstctd64 *ctd64;
2919 	uint64_t ebits, idxmask;
2920 	uint32_t bitsperidx, nebits;
2921 	int error, idx, is32bit, maxctds, remctds, tmperr;
2922 
2923 	error = 0;
2924 
2925 	switch (vs_dtype) {
2926 	case VSD_DTYPE_TDGSTCLUST32:
2927 		ctd32tree = &VSD(tdgstclust32, tdgst)->ctdtree;
2928 		if (!ARB_FULL(ctd32tree))
2929 			return (0);
2930 		VSD(tdgstclust32, tdgst)->compcnt++;
2931 		maxctds = remctds = ARB_MAXNODES(ctd32tree);
2932 		ARB_RESET_TREE(ctd32tree, ctdth32, maxctds);
2933 		VSD(tdgstclust32, tdgst)->smplcnt = 0;
2934 		is32bit = 1;
2935 		ctd64tree = NULL;
2936 		ctd64 = NULL;
2937 #ifdef DIAGNOSTIC
2938 		RB_INIT(&VSD(tdgstclust32, tdgst)->rbctdtree);
2939 #endif
2940 		break;
2941 	case VSD_DTYPE_TDGSTCLUST64:
2942 		ctd64tree = &VSD(tdgstclust64, tdgst)->ctdtree;
2943 		if (!ARB_FULL(ctd64tree))
2944 			return (0);
2945 		VSD(tdgstclust64, tdgst)->compcnt++;
2946 		maxctds = remctds = ARB_MAXNODES(ctd64tree);
2947 		ARB_RESET_TREE(ctd64tree, ctdth64, maxctds);
2948 		VSD(tdgstclust64, tdgst)->smplcnt = 0;
2949 		is32bit = 0;
2950 		ctd32tree = NULL;
2951 		ctd32 = NULL;
2952 #ifdef DIAGNOSTIC
2953 		RB_INIT(&VSD(tdgstclust64, tdgst)->rbctdtree);
2954 #endif
2955 		break;
2956 	default:
2957 		return (EINVAL);
2958 	}
2959 
2960 	/*
2961 	 * Rebuild the t-digest ARB by pseudorandomly selecting centroids and
2962 	 * re-inserting the mu/cnt of each as a value and corresponding weight.
2963 	 */
2964 
2965 #define	bitsperrand 31 /* Per random(3). */
2966 	ebits = 0;
2967 	nebits = 0;
2968 	bitsperidx = fls(maxctds);
2969 	KASSERT(bitsperidx <= sizeof(ebits) << 3,
2970 	    ("%s: bitsperidx=%d, ebits=%d",
2971 	    __func__, bitsperidx, (int)(sizeof(ebits) << 3)));
2972 	idxmask = (UINT64_C(1) << bitsperidx) - 1;
2973 	srandom(stats_sbinuptime());
2974 
2975 	/* Initialise the free list with randomised centroid indices. */
2976 	for (; remctds > 0; remctds--) {
2977 		while (nebits < bitsperidx) {
2978 			ebits |= ((uint64_t)random()) << nebits;
2979 			nebits += bitsperrand;
2980 			if (nebits > (sizeof(ebits) << 3))
2981 				nebits = sizeof(ebits) << 3;
2982 		}
2983 		idx = ebits & idxmask;
2984 		nebits -= bitsperidx;
2985 		ebits >>= bitsperidx;
2986 
2987 		/*
2988 		 * Select the next centroid to put on the ARB free list. We
2989 		 * start with the centroid at our randomly selected array index,
2990 		 * and work our way forwards until finding one (the latter
2991 		 * aspect reduces re-insertion randomness, but is good enough).
2992 		 */
2993 		do {
2994 			if (idx >= maxctds)
2995 				idx %= maxctds;
2996 
2997 			if (is32bit)
2998 				ctd32 = ARB_NODE(ctd32tree, idx);
2999 			else
3000 				ctd64 = ARB_NODE(ctd64tree, idx);
3001 		} while ((is32bit ? ARB_ISFREE(ctd32, ctdlnk) :
3002 		    ARB_ISFREE(ctd64, ctdlnk)) && ++idx);
3003 
3004 		/* Put the centroid on the ARB free list. */
3005 		if (is32bit)
3006 			ARB_RETURNFREE(ctd32tree, ctd32, ctdlnk);
3007 		else
3008 			ARB_RETURNFREE(ctd64tree, ctd64, ctdlnk);
3009 	}
3010 
3011 	/*
3012 	 * The free list now contains the randomised indices of every centroid.
3013 	 * Walk the free list from start to end, re-inserting each centroid's
3014 	 * mu/cnt. The tdgst_add() call may or may not consume the free centroid
3015 	 * we re-insert values from during each loop iteration, so we must latch
3016 	 * the index of the next free list centroid before the re-insertion
3017 	 * call. The previous loop above should have left the centroid pointer
3018 	 * pointing to the element at the head of the free list.
3019 	 */
3020 	KASSERT((is32bit ?
3021 	    ARB_FREEIDX(ctd32tree) == ARB_SELFIDX(ctd32tree, ctd32) :
3022 	    ARB_FREEIDX(ctd64tree) == ARB_SELFIDX(ctd64tree, ctd64)),
3023 	    ("%s: t-digest ARB@%p free list bug", __func__,
3024 	    (is32bit ? (void *)ctd32tree : (void *)ctd64tree)));
3025 	remctds = maxctds;
3026 	while ((is32bit ? ctd32 != NULL : ctd64 != NULL)) {
3027 		tmperr = 0;
3028 		if (is32bit) {
3029 			s64q_t x;
3030 
3031 			idx = ARB_NEXTFREEIDX(ctd32, ctdlnk);
3032 			/* Cloning a s32q_t into a s64q_t should never fail. */
3033 			tmperr = Q_QCLONEQ(&x, ctd32->mu);
3034 			tmperr = tmperr ? tmperr : stats_v1_vsd_tdgst_add(
3035 			    vs_dtype, tdgst, x, ctd32->cnt, attempt);
3036 			ctd32 = ARB_NODE(ctd32tree, idx);
3037 			KASSERT(ctd32 == NULL || ARB_ISFREE(ctd32, ctdlnk),
3038 			    ("%s: t-digest ARB@%p free list bug", __func__,
3039 			    ctd32tree));
3040 		} else {
3041 			idx = ARB_NEXTFREEIDX(ctd64, ctdlnk);
3042 			tmperr = stats_v1_vsd_tdgst_add(vs_dtype, tdgst,
3043 			    ctd64->mu, ctd64->cnt, attempt);
3044 			ctd64 = ARB_NODE(ctd64tree, idx);
3045 			KASSERT(ctd64 == NULL || ARB_ISFREE(ctd64, ctdlnk),
3046 			    ("%s: t-digest ARB@%p free list bug", __func__,
3047 			    ctd64tree));
3048 		}
3049 		/*
3050 		 * This process should not produce errors, bugs notwithstanding.
3051 		 * Just in case, latch any errors and attempt all re-insertions.
3052 		 */
3053 		error = tmperr ? tmperr : error;
3054 		remctds--;
3055 	}
3056 
3057 	KASSERT(remctds == 0, ("%s: t-digest ARB@%p free list bug", __func__,
3058 	    (is32bit ? (void *)ctd32tree : (void *)ctd64tree)));
3059 
3060 	return (error);
3061 }
3062 
3063 static inline int
3064 stats_v1_vsd_tdgst_add(enum vsd_dtype vs_dtype, struct voistatdata_tdgst *tdgst,
3065     s64q_t x, uint64_t weight, int attempt)
3066 {
3067 #ifdef DIAGNOSTIC
3068 	char qstr[Q_MAXSTRLEN(x, 10)];
3069 #endif
3070 	struct ctdth32 *ctd32tree;
3071 	struct ctdth64 *ctd64tree;
3072 	void *closest, *cur, *lb, *ub;
3073 	struct voistatdata_tdgstctd32 *ctd32;
3074 	struct voistatdata_tdgstctd64 *ctd64;
3075 	uint64_t cnt, smplcnt, sum, tmpsum;
3076 	s64q_t k, minz, q, z;
3077 	int error, is32bit, n;
3078 
3079 	error = 0;
3080 	minz = Q_INI(&z, 0, 0, Q_NFBITS(x));
3081 
3082 	switch (vs_dtype) {
3083 	case VSD_DTYPE_TDGSTCLUST32:
3084 		if ((UINT32_MAX - weight) < VSD(tdgstclust32, tdgst)->smplcnt)
3085 			error = EOVERFLOW;
3086 		smplcnt = VSD(tdgstclust32, tdgst)->smplcnt;
3087 		ctd32tree = &VSD(tdgstclust32, tdgst)->ctdtree;
3088 		is32bit = 1;
3089 		ctd64tree = NULL;
3090 		ctd64 = NULL;
3091 		break;
3092 	case VSD_DTYPE_TDGSTCLUST64:
3093 		if ((UINT64_MAX - weight) < VSD(tdgstclust64, tdgst)->smplcnt)
3094 			error = EOVERFLOW;
3095 		smplcnt = VSD(tdgstclust64, tdgst)->smplcnt;
3096 		ctd64tree = &VSD(tdgstclust64, tdgst)->ctdtree;
3097 		is32bit = 0;
3098 		ctd32tree = NULL;
3099 		ctd32 = NULL;
3100 		break;
3101 	default:
3102 		error = EINVAL;
3103 		break;
3104 	}
3105 
3106 	if (error)
3107 		return (error);
3108 
3109 	/*
3110 	 * Inspired by Ted Dunning's AVLTreeDigest.java
3111 	 */
3112 	do {
3113 #if defined(DIAGNOSTIC)
3114 		KASSERT(attempt < 5,
3115 		    ("%s: Too many attempts", __func__));
3116 #endif
3117 		if (attempt >= 5)
3118 			return (EAGAIN);
3119 
3120 		Q_SIFVAL(minz, Q_IFMAXVAL(minz));
3121 		closest = ub = NULL;
3122 		sum = tmpsum = 0;
3123 
3124 		if (is32bit)
3125 			lb = cur = (void *)(ctd32 = ARB_MIN(ctdth32, ctd32tree));
3126 		else
3127 			lb = cur = (void *)(ctd64 = ARB_MIN(ctdth64, ctd64tree));
3128 
3129 		if (lb == NULL) /* Empty tree. */
3130 			lb = (is32bit ? (void *)ARB_ROOT(ctd32tree) :
3131 			    (void *)ARB_ROOT(ctd64tree));
3132 
3133 		/*
3134 		 * Find the set of centroids with minimum distance to x and
3135 		 * compute the sum of counts for all centroids with mean less
3136 		 * than the first centroid in the set.
3137 		 */
3138 		for (; cur != NULL;
3139 		    cur = (is32bit ?
3140 		    (void *)(ctd32 = ARB_NEXT(ctdth32, ctd32tree, ctd32)) :
3141 		    (void *)(ctd64 = ARB_NEXT(ctdth64, ctd64tree, ctd64)))) {
3142 			if (is32bit) {
3143 				cnt = ctd32->cnt;
3144 				KASSERT(Q_PRECEQ(ctd32->mu, x),
3145 				    ("%s: Q_RELPREC(mu,x)=%d", __func__,
3146 				    Q_RELPREC(ctd32->mu, x)));
3147 				/* Ok to assign as both have same precision. */
3148 				z = ctd32->mu;
3149 			} else {
3150 				cnt = ctd64->cnt;
3151 				KASSERT(Q_PRECEQ(ctd64->mu, x),
3152 				    ("%s: Q_RELPREC(mu,x)=%d", __func__,
3153 				    Q_RELPREC(ctd64->mu, x)));
3154 				/* Ok to assign as both have same precision. */
3155 				z = ctd64->mu;
3156 			}
3157 
3158 			error = Q_QSUBQ(&z, x);
3159 #if defined(DIAGNOSTIC)
3160 			KASSERT(!error, ("%s: unexpected error %d", __func__,
3161 			    error));
3162 #endif
3163 			if (error)
3164 				return (error);
3165 
3166 			z = Q_QABS(z);
3167 			if (Q_QLTQ(z, minz)) {
3168 				minz = z;
3169 				lb = cur;
3170 				sum = tmpsum;
3171 				tmpsum += cnt;
3172 			} else if (Q_QGTQ(z, minz)) {
3173 				ub = cur;
3174 				break;
3175 			}
3176 		}
3177 
3178 		cur = (is32bit ?
3179 		    (void *)(ctd32 = (struct voistatdata_tdgstctd32 *)lb) :
3180 		    (void *)(ctd64 = (struct voistatdata_tdgstctd64 *)lb));
3181 
3182 		for (n = 0; cur != ub; cur = (is32bit ?
3183 		    (void *)(ctd32 = ARB_NEXT(ctdth32, ctd32tree, ctd32)) :
3184 		    (void *)(ctd64 = ARB_NEXT(ctdth64, ctd64tree, ctd64)))) {
3185 			if (is32bit)
3186 				cnt = ctd32->cnt;
3187 			else
3188 				cnt = ctd64->cnt;
3189 
3190 			q = Q_CTRLINI(16);
3191 			if (smplcnt == 1)
3192 				error = Q_QFRACI(&q, 1, 2);
3193 			else
3194 				/* [ sum + ((cnt - 1) / 2) ] / (smplcnt - 1) */
3195 				error = Q_QFRACI(&q, (sum << 1) + cnt - 1,
3196 				    (smplcnt - 1) << 1);
3197 			k = q;
3198 			/* k = q x 4 x samplcnt x attempt */
3199 			error |= Q_QMULI(&k, 4 * smplcnt * attempt);
3200 			/* k = k x (1 - q) */
3201 			error |= Q_QSUBI(&q, 1);
3202 			q = Q_QABS(q);
3203 			error |= Q_QMULQ(&k, q);
3204 #if defined(DIAGNOSTIC)
3205 #if !defined(_KERNEL)
3206 			double q_dbl, k_dbl, q2d, k2d;
3207 			q2d = Q_Q2D(q);
3208 			k2d = Q_Q2D(k);
3209 			q_dbl = smplcnt == 1 ? 0.5 :
3210 			    (sum + ((cnt - 1)  / 2.0)) / (double)(smplcnt - 1);
3211 			k_dbl = 4 * smplcnt * q_dbl * (1.0 - q_dbl) * attempt;
3212 			/*
3213 			 * If the difference between q and q_dbl is greater than
3214 			 * the fractional precision of q, something is off.
3215 			 * NB: q is holding the value of 1 - q
3216 			 */
3217 			q_dbl = 1.0 - q_dbl;
3218 			KASSERT((q_dbl > q2d ? q_dbl - q2d : q2d - q_dbl) <
3219 			    (1.05 * ((double)1 / (double)(1ULL << Q_NFBITS(q)))),
3220 			    ("Q-type q bad precision"));
3221 			KASSERT((k_dbl > k2d ? k_dbl - k2d : k2d - k_dbl) <
3222 			    1.0 + (0.01 * smplcnt),
3223 			    ("Q-type k bad precision"));
3224 #endif /* !_KERNEL */
3225 			KASSERT(!error, ("%s: unexpected error %d", __func__,
3226 			    error));
3227 #endif /* DIAGNOSTIC */
3228 			if (error)
3229 				return (error);
3230 			if ((is32bit && ((ctd32->cnt + weight) <=
3231 			    (uint64_t)Q_GIVAL(k))) ||
3232 			    (!is32bit && ((ctd64->cnt + weight) <=
3233 			    (uint64_t)Q_GIVAL(k)))) {
3234 				n++;
3235 				/* random() produces 31 bits. */
3236 				if (random() < (INT32_MAX / n))
3237 					closest = cur;
3238 			}
3239 			sum += cnt;
3240 		}
3241 	} while (closest == NULL &&
3242 	    (is32bit ? ARB_FULL(ctd32tree) : ARB_FULL(ctd64tree)) &&
3243 	    (error = stats_v1_vsd_tdgst_compress(vs_dtype, tdgst,
3244 	    attempt++)) == 0);
3245 
3246 	if (error)
3247 		return (error);
3248 
3249 	if (closest != NULL) {
3250 		/* Merge with an existing centroid. */
3251 		if (is32bit) {
3252 			ctd32 = (struct voistatdata_tdgstctd32 *)closest;
3253 			error = Q_QSUBQ(&x, ctd32->mu);
3254 			error = error ? error :
3255 			    Q_QDIVI(&x, ctd32->cnt + weight);
3256 			if (error || (error = Q_QADDQ(&ctd32->mu, x))) {
3257 #ifdef DIAGNOSTIC
3258 				KASSERT(!error, ("%s: unexpected error %d",
3259 				    __func__, error));
3260 #endif
3261 				return (error);
3262 			}
3263 			ctd32->cnt += weight;
3264 			error = ARB_REINSERT(ctdth32, ctd32tree, ctd32) ==
3265 			    NULL ? 0 : EALREADY;
3266 #ifdef DIAGNOSTIC
3267 			RB_REINSERT(rbctdth32,
3268 			    &VSD(tdgstclust32, tdgst)->rbctdtree, ctd32);
3269 #endif
3270 		} else {
3271 			ctd64 = (struct voistatdata_tdgstctd64 *)closest;
3272 			error = Q_QSUBQ(&x, ctd64->mu);
3273 			error = error ? error :
3274 			    Q_QDIVI(&x, ctd64->cnt + weight);
3275 			if (error || (error = Q_QADDQ(&ctd64->mu, x))) {
3276 				KASSERT(!error, ("%s: unexpected error %d",
3277 				    __func__, error));
3278 				return (error);
3279 			}
3280 			ctd64->cnt += weight;
3281 			error = ARB_REINSERT(ctdth64, ctd64tree, ctd64) ==
3282 			    NULL ? 0 : EALREADY;
3283 #ifdef DIAGNOSTIC
3284 			RB_REINSERT(rbctdth64,
3285 			    &VSD(tdgstclust64, tdgst)->rbctdtree, ctd64);
3286 #endif
3287 		}
3288 	} else {
3289 		/*
3290 		 * Add a new centroid. If digest compression is working
3291 		 * correctly, there should always be at least one free.
3292 		 */
3293 		if (is32bit) {
3294 			ctd32 = ARB_GETFREE(ctd32tree, ctdlnk);
3295 #ifdef DIAGNOSTIC
3296 			KASSERT(ctd32 != NULL,
3297 			    ("%s: t-digest@%p has no free centroids",
3298 			    __func__, tdgst));
3299 #endif
3300 			if (ctd32 == NULL)
3301 				return (EAGAIN);
3302 			if ((error = Q_QCPYVALQ(&ctd32->mu, x)))
3303 				return (error);
3304 			ctd32->cnt = weight;
3305 			error = ARB_INSERT(ctdth32, ctd32tree, ctd32) == NULL ?
3306 			    0 : EALREADY;
3307 #ifdef DIAGNOSTIC
3308 			RB_INSERT(rbctdth32,
3309 			    &VSD(tdgstclust32, tdgst)->rbctdtree, ctd32);
3310 #endif
3311 		} else {
3312 			ctd64 = ARB_GETFREE(ctd64tree, ctdlnk);
3313 #ifdef DIAGNOSTIC
3314 			KASSERT(ctd64 != NULL,
3315 			    ("%s: t-digest@%p has no free centroids",
3316 			    __func__, tdgst));
3317 #endif
3318 			if (ctd64 == NULL) /* Should not happen. */
3319 				return (EAGAIN);
3320 			/* Direct assignment ok as both have same type/prec. */
3321 			ctd64->mu = x;
3322 			ctd64->cnt = weight;
3323 			error = ARB_INSERT(ctdth64, ctd64tree, ctd64) == NULL ?
3324 			    0 : EALREADY;
3325 #ifdef DIAGNOSTIC
3326 			RB_INSERT(rbctdth64, &VSD(tdgstclust64,
3327 			    tdgst)->rbctdtree, ctd64);
3328 #endif
3329 		}
3330 	}
3331 
3332 	if (is32bit)
3333 		VSD(tdgstclust32, tdgst)->smplcnt += weight;
3334 	else {
3335 		VSD(tdgstclust64, tdgst)->smplcnt += weight;
3336 
3337 #ifdef DIAGNOSTIC
3338 		struct rbctdth64 *rbctdtree =
3339 		    &VSD(tdgstclust64, tdgst)->rbctdtree;
3340 		struct voistatdata_tdgstctd64 *rbctd64;
3341 		int i = 0;
3342 		ARB_FOREACH(ctd64, ctdth64, ctd64tree) {
3343 			rbctd64 = (i == 0 ? RB_MIN(rbctdth64, rbctdtree) :
3344 			    RB_NEXT(rbctdth64, rbctdtree, rbctd64));
3345 
3346 			if (i >= ARB_CURNODES(ctd64tree)
3347 			    || ctd64 != rbctd64
3348 			    || ARB_MIN(ctdth64, ctd64tree) !=
3349 			       RB_MIN(rbctdth64, rbctdtree)
3350 			    || ARB_MAX(ctdth64, ctd64tree) !=
3351 			       RB_MAX(rbctdth64, rbctdtree)
3352 			    || ARB_LEFTIDX(ctd64, ctdlnk) !=
3353 			       ARB_SELFIDX(ctd64tree, RB_LEFT(rbctd64, rblnk))
3354 			    || ARB_RIGHTIDX(ctd64, ctdlnk) !=
3355 			       ARB_SELFIDX(ctd64tree, RB_RIGHT(rbctd64, rblnk))
3356 			    || ARB_PARENTIDX(ctd64, ctdlnk) !=
3357 			       ARB_SELFIDX(ctd64tree,
3358 			       RB_PARENT(rbctd64, rblnk))) {
3359 				Q_TOSTR(ctd64->mu, -1, 10, qstr, sizeof(qstr));
3360 				printf("ARB ctd=%3d p=%3d l=%3d r=%3d c=%2d "
3361 				    "mu=%s\n",
3362 				    (int)ARB_SELFIDX(ctd64tree, ctd64),
3363 				    ARB_PARENTIDX(ctd64, ctdlnk),
3364 				    ARB_LEFTIDX(ctd64, ctdlnk),
3365 				    ARB_RIGHTIDX(ctd64, ctdlnk),
3366 				    ARB_COLOR(ctd64, ctdlnk),
3367 				    qstr);
3368 
3369 				Q_TOSTR(rbctd64->mu, -1, 10, qstr,
3370 				    sizeof(qstr));
3371 				printf(" RB ctd=%3d p=%3d l=%3d r=%3d c=%2d "
3372 				    "mu=%s\n",
3373 				    (int)ARB_SELFIDX(ctd64tree, rbctd64),
3374 				    (int)ARB_SELFIDX(ctd64tree,
3375 				      RB_PARENT(rbctd64, rblnk)),
3376 				    (int)ARB_SELFIDX(ctd64tree,
3377 				      RB_LEFT(rbctd64, rblnk)),
3378 				    (int)ARB_SELFIDX(ctd64tree,
3379 				      RB_RIGHT(rbctd64, rblnk)),
3380 				    RB_COLOR(rbctd64, rblnk),
3381 				    qstr);
3382 
3383 				panic("RB@%p and ARB@%p trees differ\n",
3384 				    rbctdtree, ctd64tree);
3385 			}
3386 			i++;
3387 		}
3388 #endif /* DIAGNOSTIC */
3389 	}
3390 
3391 	return (error);
3392 }
3393 
3394 static inline int
3395 stats_v1_voi_update_tdgst(enum vsd_dtype voi_dtype, struct voistatdata *voival,
3396     struct voistat *vs, struct voistatdata_tdgst *tdgst)
3397 {
3398 	s64q_t x;
3399 	int error;
3400 
3401 	error = 0;
3402 
3403 	switch (vs->dtype) {
3404 	case VSD_DTYPE_TDGSTCLUST32:
3405 		/* Use same precision as the user's centroids. */
3406 		Q_INI(&x, 0, 0, Q_NFBITS(
3407 		    ARB_CNODE(&VSD(tdgstclust32, tdgst)->ctdtree, 0)->mu));
3408 		break;
3409 	case VSD_DTYPE_TDGSTCLUST64:
3410 		/* Use same precision as the user's centroids. */
3411 		Q_INI(&x, 0, 0, Q_NFBITS(
3412 		    ARB_CNODE(&VSD(tdgstclust64, tdgst)->ctdtree, 0)->mu));
3413 		break;
3414 	default:
3415 		KASSERT(vs->dtype == VSD_DTYPE_TDGSTCLUST32 ||
3416 		    vs->dtype == VSD_DTYPE_TDGSTCLUST64,
3417 		    ("%s: vs->dtype(%d) != VSD_DTYPE_TDGSTCLUST<32|64>",
3418 		    __func__, vs->dtype));
3419 		return (EINVAL);
3420 	}
3421 
3422 	/*
3423 	 * XXXLAS: Should have both a signed and unsigned 'x' variable to avoid
3424 	 * returning EOVERFLOW if the voival would have fit in a u64q_t.
3425 	 */
3426 	switch (voi_dtype) {
3427 	case VSD_DTYPE_INT_S32:
3428 		error = Q_QCPYVALI(&x, voival->int32.s32);
3429 		break;
3430 	case VSD_DTYPE_INT_U32:
3431 		error = Q_QCPYVALI(&x, voival->int32.u32);
3432 		break;
3433 	case VSD_DTYPE_INT_S64:
3434 		error = Q_QCPYVALI(&x, voival->int64.s64);
3435 		break;
3436 	case VSD_DTYPE_INT_U64:
3437 		error = Q_QCPYVALI(&x, voival->int64.u64);
3438 		break;
3439 	case VSD_DTYPE_INT_SLONG:
3440 		error = Q_QCPYVALI(&x, voival->intlong.slong);
3441 		break;
3442 	case VSD_DTYPE_INT_ULONG:
3443 		error = Q_QCPYVALI(&x, voival->intlong.ulong);
3444 		break;
3445 	case VSD_DTYPE_Q_S32:
3446 		error = Q_QCPYVALQ(&x, voival->q32.sq32);
3447 		break;
3448 	case VSD_DTYPE_Q_U32:
3449 		error = Q_QCPYVALQ(&x, voival->q32.uq32);
3450 		break;
3451 	case VSD_DTYPE_Q_S64:
3452 		error = Q_QCPYVALQ(&x, voival->q64.sq64);
3453 		break;
3454 	case VSD_DTYPE_Q_U64:
3455 		error = Q_QCPYVALQ(&x, voival->q64.uq64);
3456 		break;
3457 	default:
3458 		error = EINVAL;
3459 		break;
3460 	}
3461 
3462 	if (error ||
3463 	    (error = stats_v1_vsd_tdgst_add(vs->dtype, tdgst, x, 1, 1)))
3464 		return (error);
3465 
3466 	vs->flags |= VS_VSDVALID;
3467 	return (0);
3468 }
3469 
3470 int
3471 stats_v1_voi_update(struct statsblobv1 *sb, int32_t voi_id,
3472     enum vsd_dtype voi_dtype, struct voistatdata *voival, uint32_t flags)
3473 {
3474 	struct voi *v;
3475 	struct voistat *vs;
3476 	void *statevsd, *vsd;
3477 	int error, i, tmperr;
3478 
3479 	error = 0;
3480 
3481 	if (sb == NULL || sb->abi != STATS_ABI_V1 || voi_id >= NVOIS(sb) ||
3482 	    voi_dtype == 0 || voi_dtype >= VSD_NUM_DTYPES || voival == NULL)
3483 		return (EINVAL);
3484 	v = &sb->vois[voi_id];
3485 	if (voi_dtype != v->dtype || v->id < 0 ||
3486 	    ((flags & SB_VOI_RELUPDATE) && !(v->flags & VOI_REQSTATE)))
3487 		return (EINVAL);
3488 
3489 	vs = BLOB_OFFSET(sb, v->stats_off);
3490 	if (v->flags & VOI_REQSTATE)
3491 		statevsd = BLOB_OFFSET(sb, vs->data_off);
3492 	else
3493 		statevsd = NULL;
3494 
3495 	if (flags & SB_VOI_RELUPDATE) {
3496 		switch (voi_dtype) {
3497 		case VSD_DTYPE_INT_S32:
3498 			voival->int32.s32 +=
3499 			    VSD(voistate, statevsd)->prev.int32.s32;
3500 			break;
3501 		case VSD_DTYPE_INT_U32:
3502 			voival->int32.u32 +=
3503 			    VSD(voistate, statevsd)->prev.int32.u32;
3504 			break;
3505 		case VSD_DTYPE_INT_S64:
3506 			voival->int64.s64 +=
3507 			    VSD(voistate, statevsd)->prev.int64.s64;
3508 			break;
3509 		case VSD_DTYPE_INT_U64:
3510 			voival->int64.u64 +=
3511 			    VSD(voistate, statevsd)->prev.int64.u64;
3512 			break;
3513 		case VSD_DTYPE_INT_SLONG:
3514 			voival->intlong.slong +=
3515 			    VSD(voistate, statevsd)->prev.intlong.slong;
3516 			break;
3517 		case VSD_DTYPE_INT_ULONG:
3518 			voival->intlong.ulong +=
3519 			    VSD(voistate, statevsd)->prev.intlong.ulong;
3520 			break;
3521 		case VSD_DTYPE_Q_S32:
3522 			error = Q_QADDQ(&voival->q32.sq32,
3523 			    VSD(voistate, statevsd)->prev.q32.sq32);
3524 			break;
3525 		case VSD_DTYPE_Q_U32:
3526 			error = Q_QADDQ(&voival->q32.uq32,
3527 			    VSD(voistate, statevsd)->prev.q32.uq32);
3528 			break;
3529 		case VSD_DTYPE_Q_S64:
3530 			error = Q_QADDQ(&voival->q64.sq64,
3531 			    VSD(voistate, statevsd)->prev.q64.sq64);
3532 			break;
3533 		case VSD_DTYPE_Q_U64:
3534 			error = Q_QADDQ(&voival->q64.uq64,
3535 			    VSD(voistate, statevsd)->prev.q64.uq64);
3536 			break;
3537 		default:
3538 			KASSERT(0, ("Unknown VOI data type %d", voi_dtype));
3539 			break;
3540 		}
3541 	}
3542 
3543 	if (error)
3544 		return (error);
3545 
3546 	for (i = v->voistatmaxid; i > 0; i--) {
3547 		vs = &((struct voistat *)BLOB_OFFSET(sb, v->stats_off))[i];
3548 		if (vs->stype < 0)
3549 			continue;
3550 
3551 		vsd = BLOB_OFFSET(sb, vs->data_off);
3552 
3553 		switch (vs->stype) {
3554 		case VS_STYPE_MAX:
3555 			tmperr = stats_v1_voi_update_max(voi_dtype, voival,
3556 			    vs, vsd);
3557 			break;
3558 		case VS_STYPE_MIN:
3559 			tmperr = stats_v1_voi_update_min(voi_dtype, voival,
3560 			    vs, vsd);
3561 			break;
3562 		case VS_STYPE_SUM:
3563 			tmperr = stats_v1_voi_update_sum(voi_dtype, voival,
3564 			    vs, vsd);
3565 			break;
3566 		case VS_STYPE_HIST:
3567 			tmperr = stats_v1_voi_update_hist(voi_dtype, voival,
3568 			    vs, vsd);
3569 			break;
3570 		case VS_STYPE_TDGST:
3571 			tmperr = stats_v1_voi_update_tdgst(voi_dtype, voival,
3572 			    vs, vsd);
3573 			break;
3574 		default:
3575 			KASSERT(0, ("Unknown VOI stat type %d", vs->stype));
3576 			break;
3577 		}
3578 
3579 		if (tmperr) {
3580 			error = tmperr;
3581 			VS_INCERRS(vs);
3582 		}
3583 	}
3584 
3585 	if (statevsd) {
3586 		switch (voi_dtype) {
3587 		case VSD_DTYPE_INT_S32:
3588 			VSD(voistate, statevsd)->prev.int32.s32 =
3589 			    voival->int32.s32;
3590 			break;
3591 		case VSD_DTYPE_INT_U32:
3592 			VSD(voistate, statevsd)->prev.int32.u32 =
3593 			    voival->int32.u32;
3594 			break;
3595 		case VSD_DTYPE_INT_S64:
3596 			VSD(voistate, statevsd)->prev.int64.s64 =
3597 			    voival->int64.s64;
3598 			break;
3599 		case VSD_DTYPE_INT_U64:
3600 			VSD(voistate, statevsd)->prev.int64.u64 =
3601 			    voival->int64.u64;
3602 			break;
3603 		case VSD_DTYPE_INT_SLONG:
3604 			VSD(voistate, statevsd)->prev.intlong.slong =
3605 			    voival->intlong.slong;
3606 			break;
3607 		case VSD_DTYPE_INT_ULONG:
3608 			VSD(voistate, statevsd)->prev.intlong.ulong =
3609 			    voival->intlong.ulong;
3610 			break;
3611 		case VSD_DTYPE_Q_S32:
3612 			error = Q_QCPYVALQ(
3613 			    &VSD(voistate, statevsd)->prev.q32.sq32,
3614 			    voival->q32.sq32);
3615 			break;
3616 		case VSD_DTYPE_Q_U32:
3617 			error = Q_QCPYVALQ(
3618 			    &VSD(voistate, statevsd)->prev.q32.uq32,
3619 			    voival->q32.uq32);
3620 			break;
3621 		case VSD_DTYPE_Q_S64:
3622 			error = Q_QCPYVALQ(
3623 			    &VSD(voistate, statevsd)->prev.q64.sq64,
3624 			    voival->q64.sq64);
3625 			break;
3626 		case VSD_DTYPE_Q_U64:
3627 			error = Q_QCPYVALQ(
3628 			    &VSD(voistate, statevsd)->prev.q64.uq64,
3629 			    voival->q64.uq64);
3630 			break;
3631 		default:
3632 			KASSERT(0, ("Unknown VOI data type %d", voi_dtype));
3633 			break;
3634 		}
3635 	}
3636 
3637 	return (error);
3638 }
3639 
3640 #ifdef _KERNEL
3641 
3642 static void
3643 stats_init(void *arg)
3644 {
3645 
3646 }
3647 SYSINIT(stats, SI_SUB_KDTRACE, SI_ORDER_FIRST, stats_init, NULL);
3648 
3649 /*
3650  * Sysctl handler to display the list of available stats templates.
3651  */
3652 static int
3653 stats_tpl_list_available(SYSCTL_HANDLER_ARGS)
3654 {
3655 	struct sbuf *s;
3656 	int err, i;
3657 
3658 	err = 0;
3659 
3660 	/* We can tolerate ntpl being stale, so do not take the lock. */
3661 	s = sbuf_new(NULL, NULL, /* +1 per tpl for , */
3662 	    ntpl * (STATS_TPL_MAX_STR_SPEC_LEN + 1), SBUF_FIXEDLEN);
3663 	if (s == NULL)
3664 		return (ENOMEM);
3665 
3666 	TPL_LIST_RLOCK();
3667 	for (i = 0; i < ntpl; i++) {
3668 		err = sbuf_printf(s, "%s\"%s\":%u", i ? "," : "",
3669 		    tpllist[i]->mb->tplname, tpllist[i]->mb->tplhash);
3670 		if (err) {
3671 			/* Sbuf overflow condition. */
3672 			err = EOVERFLOW;
3673 			break;
3674 		}
3675 	}
3676 	TPL_LIST_RUNLOCK();
3677 
3678 	if (!err) {
3679 		sbuf_finish(s);
3680 		err = sysctl_handle_string(oidp, sbuf_data(s), 0, req);
3681 	}
3682 
3683 	sbuf_delete(s);
3684 	return (err);
3685 }
3686 
3687 /*
3688  * Called by subsystem-specific sysctls to report and/or parse the list of
3689  * templates being sampled and their sampling rates. A stats_tpl_sr_cb_t
3690  * conformant function pointer must be passed in as arg1, which is used to
3691  * interact with the subsystem's stats template sample rates list. If arg2 > 0,
3692  * a zero-initialised allocation of arg2-sized contextual memory is
3693  * heap-allocated and passed in to all subsystem callbacks made during the
3694  * operation of stats_tpl_sample_rates().
3695  *
3696  * XXXLAS: Assumes templates are never removed, which is currently true but may
3697  * need to be reworked in future if dynamic template management becomes a
3698  * requirement e.g. to support kernel module based templates.
3699  */
3700 int
3701 stats_tpl_sample_rates(SYSCTL_HANDLER_ARGS)
3702 {
3703 	char kvpair_fmt[16], tplspec_fmt[16];
3704 	char tpl_spec[STATS_TPL_MAX_STR_SPEC_LEN];
3705 	char tpl_name[TPL_MAX_NAME_LEN + 2]; /* +2 for "" */
3706 	stats_tpl_sr_cb_t subsys_cb;
3707 	void *subsys_ctx;
3708 	char *buf, *new_rates_usr_str, *tpl_name_p;
3709 	struct stats_tpl_sample_rate *rates;
3710 	struct sbuf *s, _s;
3711 	uint32_t cum_pct, pct, tpl_hash;
3712 	int err, i, off, len, newlen, nrates;
3713 
3714 	buf = NULL;
3715 	rates = NULL;
3716 	err = nrates = 0;
3717 	subsys_cb = (stats_tpl_sr_cb_t)arg1;
3718 	KASSERT(subsys_cb != NULL, ("%s: subsys_cb == arg1 == NULL", __func__));
3719 	if (arg2 > 0)
3720 		subsys_ctx = malloc(arg2, M_TEMP, M_WAITOK | M_ZERO);
3721 	else
3722 		subsys_ctx = NULL;
3723 
3724 	/* Grab current count of subsystem rates. */
3725 	err = subsys_cb(TPL_SR_UNLOCKED_GET, NULL, &nrates, subsys_ctx);
3726 	if (err)
3727 		goto done;
3728 
3729 	/* +1 to ensure we can append '\0' post copyin, +5 per rate for =nnn, */
3730 	len = max(req->newlen + 1, nrates * (STATS_TPL_MAX_STR_SPEC_LEN + 5));
3731 
3732 	if (req->oldptr != NULL || req->newptr != NULL)
3733 		buf = malloc(len, M_TEMP, M_WAITOK);
3734 
3735 	if (req->oldptr != NULL) {
3736 		if (nrates == 0) {
3737 			/* No rates, so return an empty string via oldptr. */
3738 			err = SYSCTL_OUT(req, "", 1);
3739 			if (err)
3740 				goto done;
3741 			goto process_new;
3742 		}
3743 
3744 		s = sbuf_new(&_s, buf, len, SBUF_FIXEDLEN | SBUF_INCLUDENUL);
3745 
3746 		/* Grab locked count of, and ptr to, subsystem rates. */
3747 		err = subsys_cb(TPL_SR_RLOCKED_GET, &rates, &nrates,
3748 		    subsys_ctx);
3749 		if (err)
3750 			goto done;
3751 		TPL_LIST_RLOCK();
3752 		for (i = 0; i < nrates && !err; i++) {
3753 			err = sbuf_printf(s, "%s\"%s\":%u=%u", i ? "," : "",
3754 			    tpllist[rates[i].tpl_slot_id]->mb->tplname,
3755 			    tpllist[rates[i].tpl_slot_id]->mb->tplhash,
3756 			    rates[i].tpl_sample_pct);
3757 		}
3758 		TPL_LIST_RUNLOCK();
3759 		/* Tell subsystem that we're done with its rates list. */
3760 		err = subsys_cb(TPL_SR_RUNLOCK, &rates, &nrates, subsys_ctx);
3761 		if (err)
3762 			goto done;
3763 
3764 		err = sbuf_finish(s);
3765 		if (err)
3766 			goto done; /* We lost a race for buf to be too small. */
3767 
3768 		/* Return the rendered string data via oldptr. */
3769 		err = SYSCTL_OUT(req, sbuf_data(s), sbuf_len(s));
3770 	} else {
3771 		/* Return the upper bound size for buffer sizing requests. */
3772 		err = SYSCTL_OUT(req, NULL, len);
3773 	}
3774 
3775 process_new:
3776 	if (err || req->newptr == NULL)
3777 		goto done;
3778 
3779 	newlen = req->newlen - req->newidx;
3780 	err = SYSCTL_IN(req, buf, newlen);
3781 	if (err)
3782 		goto done;
3783 
3784 	/*
3785 	 * Initialise format strings at run time.
3786 	 *
3787 	 * Write the max template spec string length into the
3788 	 * template_spec=percent key-value pair parsing format string as:
3789 	 *     " %<width>[^=]=%u %n"
3790 	 *
3791 	 * Write the max template name string length into the tplname:tplhash
3792 	 * parsing format string as:
3793 	 *     "%<width>[^:]:%u"
3794 	 *
3795 	 * Subtract 1 for \0 appended by sscanf().
3796 	 */
3797 	sprintf(kvpair_fmt, " %%%zu[^=]=%%u %%n", sizeof(tpl_spec) - 1);
3798 	sprintf(tplspec_fmt, "%%%zu[^:]:%%u", sizeof(tpl_name) - 1);
3799 
3800 	/*
3801 	 * Parse each CSV key-value pair specifying a template and its sample
3802 	 * percentage. Whitespace either side of a key-value pair is ignored.
3803 	 * Templates can be specified by name, hash, or name and hash per the
3804 	 * following formats (chars in [] are optional):
3805 	 *    ["]<tplname>["]=<percent>
3806 	 *    :hash=pct
3807 	 *    ["]<tplname>["]:hash=<percent>
3808 	 */
3809 	cum_pct = nrates = 0;
3810 	rates = NULL;
3811 	buf[newlen] = '\0'; /* buf is at least newlen+1 in size. */
3812 	new_rates_usr_str = buf;
3813 	while (isspace(*new_rates_usr_str))
3814 		new_rates_usr_str++; /* Skip leading whitespace. */
3815 	while (*new_rates_usr_str != '\0') {
3816 		tpl_name_p = tpl_name;
3817 		tpl_name[0] = '\0';
3818 		tpl_hash = 0;
3819 		off = 0;
3820 
3821 		/*
3822 		 * Parse key-value pair which must perform 2 conversions, then
3823 		 * parse the template spec to extract either name, hash, or name
3824 		 * and hash depending on the three possible spec formats. The
3825 		 * tplspec_fmt format specifier parses name or name and hash
3826 		 * template specs, while the ":%u" format specifier parses
3827 		 * hash-only template specs. If parsing is successfull, ensure
3828 		 * the cumulative sampling percentage does not exceed 100.
3829 		 */
3830 		err = EINVAL;
3831 		if (2 != sscanf(new_rates_usr_str, kvpair_fmt, tpl_spec, &pct,
3832 		    &off))
3833 			break;
3834 		if ((1 > sscanf(tpl_spec, tplspec_fmt, tpl_name, &tpl_hash)) &&
3835 		    (1 != sscanf(tpl_spec, ":%u", &tpl_hash)))
3836 			break;
3837 		if ((cum_pct += pct) > 100)
3838 			break;
3839 		err = 0;
3840 
3841 		/* Strip surrounding "" from template name if present. */
3842 		len = strlen(tpl_name);
3843 		if (len > 0) {
3844 			if (tpl_name[len - 1] == '"')
3845 				tpl_name[--len] = '\0';
3846 			if (tpl_name[0] == '"') {
3847 				tpl_name_p++;
3848 				len--;
3849 			}
3850 		}
3851 
3852 		rates = stats_realloc(rates, 0, /* oldsz is unused in kernel. */
3853 		    (nrates + 1) * sizeof(*rates), M_WAITOK);
3854 		rates[nrates].tpl_slot_id =
3855 		    stats_tpl_fetch_allocid(len ? tpl_name_p : NULL, tpl_hash);
3856 		if (rates[nrates].tpl_slot_id < 0) {
3857 			err = -rates[nrates].tpl_slot_id;
3858 			break;
3859 		}
3860 		rates[nrates].tpl_sample_pct = pct;
3861 		nrates++;
3862 		new_rates_usr_str += off;
3863 		if (*new_rates_usr_str != ',')
3864 			break; /* End-of-input or malformed. */
3865 		new_rates_usr_str++; /* Move past comma to next pair. */
3866 	}
3867 
3868 	if (!err) {
3869 		if ((new_rates_usr_str - buf) < newlen) {
3870 			/* Entire input has not been consumed. */
3871 			err = EINVAL;
3872 		} else {
3873 			/*
3874 			 * Give subsystem the new rates. They'll return the
3875 			 * appropriate rates pointer for us to garbage collect.
3876 			 */
3877 			err = subsys_cb(TPL_SR_PUT, &rates, &nrates,
3878 			    subsys_ctx);
3879 		}
3880 	}
3881 	stats_free(rates);
3882 
3883 done:
3884 	free(buf, M_TEMP);
3885 	free(subsys_ctx, M_TEMP);
3886 	return (err);
3887 }
3888 
3889 SYSCTL_NODE(_kern, OID_AUTO, stats, CTLFLAG_RW, NULL,
3890     "stats(9) MIB");
3891 
3892 SYSCTL_PROC(_kern_stats, OID_AUTO, templates, CTLTYPE_STRING|CTLFLAG_RD,
3893     NULL, 0, stats_tpl_list_available, "A",
3894     "list the name/hash of all available stats(9) templates");
3895 
3896 #else /* ! _KERNEL */
3897 
3898 static void __attribute__ ((constructor))
3899 stats_constructor(void)
3900 {
3901 
3902 	pthread_rwlock_init(&tpllistlock, NULL);
3903 }
3904 
3905 static void __attribute__ ((destructor))
3906 stats_destructor(void)
3907 {
3908 
3909 	pthread_rwlock_destroy(&tpllistlock);
3910 }
3911 
3912 #endif /* _KERNEL */
3913