xref: /freebsd/sys/kern/subr_stats.c (revision b3e7694832e81d7a904a10f525f8797b753bf0d3)
1 /*-
2  * Copyright (c) 2014-2018 Netflix, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 /*
28  * Author: Lawrence Stewart <lstewart@netflix.com>
29  */
30 
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33 
34 #include <sys/param.h>
35 #include <sys/arb.h>
36 #include <sys/ctype.h>
37 #include <sys/errno.h>
38 #include <sys/hash.h>
39 #include <sys/limits.h>
40 #include <sys/malloc.h>
41 #include <sys/qmath.h>
42 #include <sys/sbuf.h>
43 #if defined(DIAGNOSTIC)
44 #include <sys/tree.h>
45 #endif
46 #include <sys/stats.h> /* Must come after qmath.h and arb.h */
47 #include <sys/stddef.h>
48 #include <sys/stdint.h>
49 #include <sys/time.h>
50 
51 #ifdef _KERNEL
52 #include <sys/kernel.h>
53 #include <sys/lock.h>
54 #include <sys/rwlock.h>
55 #include <sys/sysctl.h>
56 #include <sys/systm.h>
57 #else /* ! _KERNEL */
58 #include <pthread.h>
59 #include <stdbool.h>
60 #include <stdio.h>
61 #include <stdlib.h>
62 #include <string.h>
63 #endif /* _KERNEL */
64 
65 struct voistatdata_voistate {
66 	/* Previous VOI value for diff calculation. */
67 	struct voistatdata_numeric prev;
68 };
69 
70 #define	VS_VSDVALID	0x0001	/* Stat's voistatdata updated at least once. */
71 struct voistat {
72 	int8_t		stype;		/* Type of stat e.g. VS_STYPE_SUM. */
73 	enum vsd_dtype	dtype : 8;	/* Data type of this stat's data. */
74 	uint16_t	data_off;	/* Blob offset for this stat's data. */
75 	uint16_t	dsz;		/* Size of stat's data. */
76 #define	VS_EBITS 8
77 	uint16_t	errs : VS_EBITS;/* Non-wrapping error count. */
78 	uint16_t	flags : 16 - VS_EBITS;
79 };
80 /* The voistat error count is capped to avoid wrapping. */
81 #define	VS_INCERRS(vs) do {						\
82 	if ((vs)->errs < (1U << VS_EBITS) - 1)				\
83 		(vs)->errs++;						\
84 } while (0)
85 
86 /*
87  * Ideas for flags:
88  *   - Global or entity specific (global would imply use of counter(9)?)
89  *   - Whether to reset stats on read or not
90  *   - Signal an overflow?
91  *   - Compressed voistat array
92  */
93 #define	VOI_REQSTATE	0x0001	/* VOI requires VS_STYPE_VOISTATE. */
94 struct voi {
95 	int16_t		id;		/* VOI id. */
96 	enum vsd_dtype	dtype : 8;	/* Data type of the VOI itself. */
97 	int8_t		voistatmaxid;	/* Largest allocated voistat index. */
98 	uint16_t	stats_off;	/* Blob offset for this VOIs stats. */
99 	uint16_t	flags;
100 };
101 
102 /*
103  * Memory for the entire blob is allocated as a slab and then offsets are
104  * maintained to carve up the slab into sections holding different data types.
105  *
106  * Ideas for flags:
107  * - Compressed voi array (trade off memory usage vs search time)
108  * - Units of offsets (default bytes, flag for e.g. vm_page/KiB/Mib)
109  */
110 struct statsblobv1 {
111 	uint8_t		abi;
112 	uint8_t		endian;
113 	uint16_t	flags;
114 	uint16_t	maxsz;
115 	uint16_t	cursz;
116 	/* Fields from here down are opaque to consumers. */
117 	uint32_t	tplhash;	/* Base template hash ID. */
118 	uint16_t	stats_off;	/* voistat array blob offset. */
119 	uint16_t	statsdata_off;	/* voistatdata array blob offset. */
120 	sbintime_t	created;	/* Blob creation time. */
121 	sbintime_t	lastrst;	/* Time of last reset. */
122 	struct voi	vois[];		/* Array indexed by [voi_id]. */
123 } __aligned(sizeof(void *));
124 _Static_assert(offsetof(struct statsblobv1, cursz) +
125     SIZEOF_MEMBER(struct statsblobv1, cursz) ==
126     offsetof(struct statsblob, opaque),
127     "statsblobv1 ABI mismatch");
128 
129 struct statsblobv1_tpl {
130 	struct metablob		*mb;
131 	struct statsblobv1	*sb;
132 };
133 
134 /* Context passed to iterator callbacks. */
135 struct sb_iter_ctx {
136 	void		*usrctx;	/* Caller supplied context. */
137 	uint32_t	flags;		/* Flags for current iteration. */
138 	int16_t		vslot;		/* struct voi slot index. */
139 	int8_t		vsslot;		/* struct voistat slot index. */
140 };
141 
142 struct sb_tostrcb_ctx {
143 	struct sbuf		*buf;
144 	struct statsblob_tpl	*tpl;
145 	enum sb_str_fmt	fmt;
146 	uint32_t		flags;
147 };
148 
149 struct sb_visitcb_ctx {
150 	stats_blob_visitcb_t	cb;
151 	void			*usrctx;
152 };
153 
154 /* Stats blob iterator callback. */
155 typedef int (*stats_v1_blob_itercb_t)(struct statsblobv1 *sb, struct voi *v,
156     struct voistat *vs, struct sb_iter_ctx *ctx);
157 
158 #ifdef _KERNEL
159 static struct rwlock tpllistlock;
160 RW_SYSINIT(stats_tpl_list, &tpllistlock, "Stat template list lock");
161 #define	TPL_LIST_RLOCK() rw_rlock(&tpllistlock)
162 #define	TPL_LIST_RUNLOCK() rw_runlock(&tpllistlock)
163 #define	TPL_LIST_WLOCK() rw_wlock(&tpllistlock)
164 #define	TPL_LIST_WUNLOCK() rw_wunlock(&tpllistlock)
165 #define	TPL_LIST_LOCK_ASSERT() rw_assert(&tpllistlock, RA_LOCKED)
166 #define	TPL_LIST_RLOCK_ASSERT() rw_assert(&tpllistlock, RA_RLOCKED)
167 #define	TPL_LIST_WLOCK_ASSERT() rw_assert(&tpllistlock, RA_WLOCKED)
168 MALLOC_DEFINE(M_STATS, "stats(9) related memory", "stats(9) related memory");
169 #define	stats_free(ptr) free((ptr), M_STATS)
170 #else /* ! _KERNEL */
171 static void stats_constructor(void);
172 static void stats_destructor(void);
173 static pthread_rwlock_t tpllistlock;
174 #define	TPL_LIST_UNLOCK() pthread_rwlock_unlock(&tpllistlock)
175 #define	TPL_LIST_RLOCK() pthread_rwlock_rdlock(&tpllistlock)
176 #define	TPL_LIST_RUNLOCK() TPL_LIST_UNLOCK()
177 #define	TPL_LIST_WLOCK() pthread_rwlock_wrlock(&tpllistlock)
178 #define	TPL_LIST_WUNLOCK() TPL_LIST_UNLOCK()
179 #define	TPL_LIST_LOCK_ASSERT() do { } while (0)
180 #define	TPL_LIST_RLOCK_ASSERT() do { } while (0)
181 #define	TPL_LIST_WLOCK_ASSERT() do { } while (0)
182 #ifdef NDEBUG
183 #define	KASSERT(cond, msg) do {} while (0)
184 #define	stats_abort() do {} while (0)
185 #else /* ! NDEBUG */
186 #define	KASSERT(cond, msg) do { \
187 	if (!(cond)) { \
188 		panic msg; \
189 	} \
190 } while (0)
191 #define	stats_abort() abort()
192 #endif /* NDEBUG */
193 #define	stats_free(ptr) free(ptr)
194 #define	panic(fmt, ...) do { \
195 	fprintf(stderr, (fmt), ##__VA_ARGS__); \
196 	stats_abort(); \
197 } while (0)
198 #endif /* _KERNEL */
199 
200 #define	SB_V1_MAXSZ 65535
201 
202 /* Obtain a blob offset pointer. */
203 #define	BLOB_OFFSET(sb, off) ((void *)(((uint8_t *)(sb)) + (off)))
204 
205 /*
206  * Number of VOIs in the blob's vois[] array. By virtue of struct voi being a
207  * power of 2 size, we can shift instead of divide. The shift amount must be
208  * updated if sizeof(struct voi) ever changes, which the assert should catch.
209  */
210 #define	NVOIS(sb) ((int32_t)((((struct statsblobv1 *)(sb))->stats_off - \
211     sizeof(struct statsblobv1)) >> 3))
212 _Static_assert(sizeof(struct voi) == 8, "statsblobv1 voi ABI mismatch");
213 
214 /* Try restrict names to alphanumeric and underscore to simplify JSON compat. */
215 const char *vs_stype2name[VS_NUM_STYPES] = {
216 	[VS_STYPE_VOISTATE] = "VOISTATE",
217 	[VS_STYPE_SUM] = "SUM",
218 	[VS_STYPE_MAX] = "MAX",
219 	[VS_STYPE_MIN] = "MIN",
220 	[VS_STYPE_HIST] = "HIST",
221 	[VS_STYPE_TDGST] = "TDGST",
222 };
223 
224 const char *vs_stype2desc[VS_NUM_STYPES] = {
225 	[VS_STYPE_VOISTATE] = "VOI related state data (not a real stat)",
226 	[VS_STYPE_SUM] = "Simple arithmetic accumulator",
227 	[VS_STYPE_MAX] = "Maximum observed VOI value",
228 	[VS_STYPE_MIN] = "Minimum observed VOI value",
229 	[VS_STYPE_HIST] = "Histogram of observed VOI values",
230 	[VS_STYPE_TDGST] = "t-digest of observed VOI values",
231 };
232 
233 const char *vsd_dtype2name[VSD_NUM_DTYPES] = {
234 	[VSD_DTYPE_VOISTATE] = "VOISTATE",
235 	[VSD_DTYPE_INT_S32] = "INT_S32",
236 	[VSD_DTYPE_INT_U32] = "INT_U32",
237 	[VSD_DTYPE_INT_S64] = "INT_S64",
238 	[VSD_DTYPE_INT_U64] = "INT_U64",
239 	[VSD_DTYPE_INT_SLONG] = "INT_SLONG",
240 	[VSD_DTYPE_INT_ULONG] = "INT_ULONG",
241 	[VSD_DTYPE_Q_S32] = "Q_S32",
242 	[VSD_DTYPE_Q_U32] = "Q_U32",
243 	[VSD_DTYPE_Q_S64] = "Q_S64",
244 	[VSD_DTYPE_Q_U64] = "Q_U64",
245 	[VSD_DTYPE_CRHIST32] = "CRHIST32",
246 	[VSD_DTYPE_DRHIST32] = "DRHIST32",
247 	[VSD_DTYPE_DVHIST32] = "DVHIST32",
248 	[VSD_DTYPE_CRHIST64] = "CRHIST64",
249 	[VSD_DTYPE_DRHIST64] = "DRHIST64",
250 	[VSD_DTYPE_DVHIST64] = "DVHIST64",
251 	[VSD_DTYPE_TDGSTCLUST32] = "TDGSTCLUST32",
252 	[VSD_DTYPE_TDGSTCLUST64] = "TDGSTCLUST64",
253 };
254 
255 const size_t vsd_dtype2size[VSD_NUM_DTYPES] = {
256 	[VSD_DTYPE_VOISTATE] = sizeof(struct voistatdata_voistate),
257 	[VSD_DTYPE_INT_S32] = sizeof(struct voistatdata_int32),
258 	[VSD_DTYPE_INT_U32] = sizeof(struct voistatdata_int32),
259 	[VSD_DTYPE_INT_S64] = sizeof(struct voistatdata_int64),
260 	[VSD_DTYPE_INT_U64] = sizeof(struct voistatdata_int64),
261 	[VSD_DTYPE_INT_SLONG] = sizeof(struct voistatdata_intlong),
262 	[VSD_DTYPE_INT_ULONG] = sizeof(struct voistatdata_intlong),
263 	[VSD_DTYPE_Q_S32] = sizeof(struct voistatdata_q32),
264 	[VSD_DTYPE_Q_U32] = sizeof(struct voistatdata_q32),
265 	[VSD_DTYPE_Q_S64] = sizeof(struct voistatdata_q64),
266 	[VSD_DTYPE_Q_U64] = sizeof(struct voistatdata_q64),
267 	[VSD_DTYPE_CRHIST32] = sizeof(struct voistatdata_crhist32),
268 	[VSD_DTYPE_DRHIST32] = sizeof(struct voistatdata_drhist32),
269 	[VSD_DTYPE_DVHIST32] = sizeof(struct voistatdata_dvhist32),
270 	[VSD_DTYPE_CRHIST64] = sizeof(struct voistatdata_crhist64),
271 	[VSD_DTYPE_DRHIST64] = sizeof(struct voistatdata_drhist64),
272 	[VSD_DTYPE_DVHIST64] = sizeof(struct voistatdata_dvhist64),
273 	[VSD_DTYPE_TDGSTCLUST32] = sizeof(struct voistatdata_tdgstclust32),
274 	[VSD_DTYPE_TDGSTCLUST64] = sizeof(struct voistatdata_tdgstclust64),
275 };
276 
277 static const bool vsd_compoundtype[VSD_NUM_DTYPES] = {
278 	[VSD_DTYPE_VOISTATE] = true,
279 	[VSD_DTYPE_INT_S32] = false,
280 	[VSD_DTYPE_INT_U32] = false,
281 	[VSD_DTYPE_INT_S64] = false,
282 	[VSD_DTYPE_INT_U64] = false,
283 	[VSD_DTYPE_INT_SLONG] = false,
284 	[VSD_DTYPE_INT_ULONG] = false,
285 	[VSD_DTYPE_Q_S32] = false,
286 	[VSD_DTYPE_Q_U32] = false,
287 	[VSD_DTYPE_Q_S64] = false,
288 	[VSD_DTYPE_Q_U64] = false,
289 	[VSD_DTYPE_CRHIST32] = true,
290 	[VSD_DTYPE_DRHIST32] = true,
291 	[VSD_DTYPE_DVHIST32] = true,
292 	[VSD_DTYPE_CRHIST64] = true,
293 	[VSD_DTYPE_DRHIST64] = true,
294 	[VSD_DTYPE_DVHIST64] = true,
295 	[VSD_DTYPE_TDGSTCLUST32] = true,
296 	[VSD_DTYPE_TDGSTCLUST64] = true,
297 };
298 
299 const struct voistatdata_numeric numeric_limits[2][VSD_DTYPE_Q_U64 + 1] = {
300 	[LIM_MIN] = {
301 		[VSD_DTYPE_VOISTATE] = {0},
302 		[VSD_DTYPE_INT_S32] = {.int32 = {.s32 = INT32_MIN}},
303 		[VSD_DTYPE_INT_U32] = {.int32 = {.u32 = 0}},
304 		[VSD_DTYPE_INT_S64] = {.int64 = {.s64 = INT64_MIN}},
305 		[VSD_DTYPE_INT_U64] = {.int64 = {.u64 = 0}},
306 		[VSD_DTYPE_INT_SLONG] = {.intlong = {.slong = LONG_MIN}},
307 		[VSD_DTYPE_INT_ULONG] = {.intlong = {.ulong = 0}},
308 		[VSD_DTYPE_Q_S32] = {.q32 = {.sq32 = Q_IFMINVAL(INT32_MIN)}},
309 		[VSD_DTYPE_Q_U32] = {.q32 = {.uq32 = 0}},
310 		[VSD_DTYPE_Q_S64] = {.q64 = {.sq64 = Q_IFMINVAL(INT64_MIN)}},
311 		[VSD_DTYPE_Q_U64] = {.q64 = {.uq64 = 0}},
312 	},
313 	[LIM_MAX] = {
314 		[VSD_DTYPE_VOISTATE] = {0},
315 		[VSD_DTYPE_INT_S32] = {.int32 = {.s32 = INT32_MAX}},
316 		[VSD_DTYPE_INT_U32] = {.int32 = {.u32 = UINT32_MAX}},
317 		[VSD_DTYPE_INT_S64] = {.int64 = {.s64 = INT64_MAX}},
318 		[VSD_DTYPE_INT_U64] = {.int64 = {.u64 = UINT64_MAX}},
319 		[VSD_DTYPE_INT_SLONG] = {.intlong = {.slong = LONG_MAX}},
320 		[VSD_DTYPE_INT_ULONG] = {.intlong = {.ulong = ULONG_MAX}},
321 		[VSD_DTYPE_Q_S32] = {.q32 = {.sq32 = Q_IFMAXVAL(INT32_MAX)}},
322 		[VSD_DTYPE_Q_U32] = {.q32 = {.uq32 = Q_IFMAXVAL(UINT32_MAX)}},
323 		[VSD_DTYPE_Q_S64] = {.q64 = {.sq64 = Q_IFMAXVAL(INT64_MAX)}},
324 		[VSD_DTYPE_Q_U64] = {.q64 = {.uq64 = Q_IFMAXVAL(UINT64_MAX)}},
325 	}
326 };
327 
328 /* tpllistlock protects tpllist and ntpl */
329 static uint32_t ntpl;
330 static struct statsblob_tpl **tpllist;
331 
332 static inline void * stats_realloc(void *ptr, size_t oldsz, size_t newsz,
333     int flags);
334 //static void stats_v1_blob_finalise(struct statsblobv1 *sb);
335 static int stats_v1_blob_init_locked(struct statsblobv1 *sb, uint32_t tpl_id,
336     uint32_t flags);
337 static int stats_v1_blob_expand(struct statsblobv1 **sbpp, int newvoibytes,
338     int newvoistatbytes, int newvoistatdatabytes);
339 static void stats_v1_blob_iter(struct statsblobv1 *sb,
340     stats_v1_blob_itercb_t icb, void *usrctx, uint32_t flags);
341 static inline int stats_v1_vsd_tdgst_add(enum vsd_dtype vs_dtype,
342     struct voistatdata_tdgst *tdgst, s64q_t x, uint64_t weight, int attempt);
343 
344 static inline int
345 ctd32cmp(const struct voistatdata_tdgstctd32 *c1, const struct voistatdata_tdgstctd32 *c2)
346 {
347 
348 	KASSERT(Q_PRECEQ(c1->mu, c2->mu),
349 	    ("%s: Q_RELPREC(c1->mu,c2->mu)=%d", __func__,
350 	    Q_RELPREC(c1->mu, c2->mu)));
351 
352        return (Q_QLTQ(c1->mu, c2->mu) ? -1 : 1);
353 }
354 ARB_GENERATE_STATIC(ctdth32, voistatdata_tdgstctd32, ctdlnk, ctd32cmp);
355 
356 static inline int
357 ctd64cmp(const struct voistatdata_tdgstctd64 *c1, const struct voistatdata_tdgstctd64 *c2)
358 {
359 
360 	KASSERT(Q_PRECEQ(c1->mu, c2->mu),
361 	    ("%s: Q_RELPREC(c1->mu,c2->mu)=%d", __func__,
362 	    Q_RELPREC(c1->mu, c2->mu)));
363 
364        return (Q_QLTQ(c1->mu, c2->mu) ? -1 : 1);
365 }
366 ARB_GENERATE_STATIC(ctdth64, voistatdata_tdgstctd64, ctdlnk, ctd64cmp);
367 
368 #ifdef DIAGNOSTIC
369 RB_GENERATE_STATIC(rbctdth32, voistatdata_tdgstctd32, rblnk, ctd32cmp);
370 RB_GENERATE_STATIC(rbctdth64, voistatdata_tdgstctd64, rblnk, ctd64cmp);
371 #endif
372 
373 static inline sbintime_t
374 stats_sbinuptime(void)
375 {
376 	sbintime_t sbt;
377 #ifdef _KERNEL
378 
379 	sbt = sbinuptime();
380 #else /* ! _KERNEL */
381 	struct timespec tp;
382 
383 	clock_gettime(CLOCK_MONOTONIC_FAST, &tp);
384 	sbt = tstosbt(tp);
385 #endif /* _KERNEL */
386 
387 	return (sbt);
388 }
389 
390 static inline void *
391 stats_realloc(void *ptr, size_t oldsz, size_t newsz, int flags)
392 {
393 
394 #ifdef _KERNEL
395 	/* Default to M_NOWAIT if neither M_NOWAIT or M_WAITOK are set. */
396 	if (!(flags & (M_WAITOK | M_NOWAIT)))
397 		flags |= M_NOWAIT;
398 	ptr = realloc(ptr, newsz, M_STATS, flags);
399 #else /* ! _KERNEL */
400 	ptr = realloc(ptr, newsz);
401 	if ((flags & M_ZERO) && ptr != NULL) {
402 		if (oldsz == 0)
403 			memset(ptr, '\0', newsz);
404 		else if (newsz > oldsz)
405 			memset(BLOB_OFFSET(ptr, oldsz), '\0', newsz - oldsz);
406 	}
407 #endif /* _KERNEL */
408 
409 	return (ptr);
410 }
411 
412 static inline char *
413 stats_strdup(const char *s,
414 #ifdef _KERNEL
415     int flags)
416 {
417 	char *copy;
418 	size_t len;
419 
420 	if (!(flags & (M_WAITOK | M_NOWAIT)))
421 		flags |= M_NOWAIT;
422 
423 	len = strlen(s) + 1;
424 	if ((copy = malloc(len, M_STATS, flags)) != NULL)
425 		bcopy(s, copy, len);
426 
427 	return (copy);
428 #else
429     int flags __unused)
430 {
431 	return (strdup(s));
432 #endif
433 }
434 
435 static inline void
436 stats_tpl_update_hash(struct statsblob_tpl *tpl)
437 {
438 
439 	TPL_LIST_WLOCK_ASSERT();
440 	tpl->mb->tplhash = hash32_str(tpl->mb->tplname, 0);
441 	for (int voi_id = 0; voi_id < NVOIS(tpl->sb); voi_id++) {
442 		if (tpl->mb->voi_meta[voi_id].name != NULL)
443 			tpl->mb->tplhash = hash32_str(
444 			    tpl->mb->voi_meta[voi_id].name, tpl->mb->tplhash);
445 	}
446 	tpl->mb->tplhash = hash32_buf(tpl->sb, tpl->sb->cursz,
447 	    tpl->mb->tplhash);
448 }
449 
450 static inline uint64_t
451 stats_pow_u64(uint64_t base, uint64_t exp)
452 {
453 	uint64_t result = 1;
454 
455 	while (exp) {
456 		if (exp & 1)
457 			result *= base;
458 		exp >>= 1;
459 		base *= base;
460 	}
461 
462 	return (result);
463 }
464 
465 static inline int
466 stats_vss_hist_bkt_hlpr(struct vss_hist_hlpr_info *info, uint32_t curbkt,
467     struct voistatdata_numeric *bkt_lb, struct voistatdata_numeric *bkt_ub)
468 {
469 	uint64_t step = 0;
470 	int error = 0;
471 
472 	switch (info->scheme) {
473 	case BKT_LIN:
474 		step = info->lin.stepinc;
475 		break;
476 	case BKT_EXP:
477 		step = stats_pow_u64(info->exp.stepbase,
478 		    info->exp.stepexp + curbkt);
479 		break;
480 	case BKT_LINEXP:
481 		{
482 		uint64_t curstepexp = 1;
483 
484 		switch (info->voi_dtype) {
485 		case VSD_DTYPE_INT_S32:
486 			while ((int32_t)stats_pow_u64(info->linexp.stepbase,
487 			    curstepexp) <= bkt_lb->int32.s32)
488 				curstepexp++;
489 			break;
490 		case VSD_DTYPE_INT_U32:
491 			while ((uint32_t)stats_pow_u64(info->linexp.stepbase,
492 			    curstepexp) <= bkt_lb->int32.u32)
493 				curstepexp++;
494 			break;
495 		case VSD_DTYPE_INT_S64:
496 			while ((int64_t)stats_pow_u64(info->linexp.stepbase,
497 			    curstepexp) <= bkt_lb->int64.s64)
498 				curstepexp++;
499 			break;
500 		case VSD_DTYPE_INT_U64:
501 			while ((uint64_t)stats_pow_u64(info->linexp.stepbase,
502 			    curstepexp) <= bkt_lb->int64.u64)
503 				curstepexp++;
504 			break;
505 		case VSD_DTYPE_INT_SLONG:
506 			while ((long)stats_pow_u64(info->linexp.stepbase,
507 			    curstepexp) <= bkt_lb->intlong.slong)
508 				curstepexp++;
509 			break;
510 		case VSD_DTYPE_INT_ULONG:
511 			while ((unsigned long)stats_pow_u64(info->linexp.stepbase,
512 			    curstepexp) <= bkt_lb->intlong.ulong)
513 				curstepexp++;
514 			break;
515 		case VSD_DTYPE_Q_S32:
516 			while ((s32q_t)stats_pow_u64(info->linexp.stepbase,
517 			    curstepexp) <= Q_GIVAL(bkt_lb->q32.sq32))
518 			break;
519 		case VSD_DTYPE_Q_U32:
520 			while ((u32q_t)stats_pow_u64(info->linexp.stepbase,
521 			    curstepexp) <= Q_GIVAL(bkt_lb->q32.uq32))
522 			break;
523 		case VSD_DTYPE_Q_S64:
524 			while ((s64q_t)stats_pow_u64(info->linexp.stepbase,
525 			    curstepexp) <= Q_GIVAL(bkt_lb->q64.sq64))
526 				curstepexp++;
527 			break;
528 		case VSD_DTYPE_Q_U64:
529 			while ((u64q_t)stats_pow_u64(info->linexp.stepbase,
530 			    curstepexp) <= Q_GIVAL(bkt_lb->q64.uq64))
531 				curstepexp++;
532 			break;
533 		default:
534 			break;
535 		}
536 
537 		step = stats_pow_u64(info->linexp.stepbase, curstepexp) /
538 		    info->linexp.linstepdiv;
539 		if (step == 0)
540 			step = 1;
541 		break;
542 		}
543 	default:
544 		break;
545 	}
546 
547 	if (info->scheme == BKT_USR) {
548 		*bkt_lb = info->usr.bkts[curbkt].lb;
549 		*bkt_ub = info->usr.bkts[curbkt].ub;
550 	} else if (step != 0) {
551 		switch (info->voi_dtype) {
552 		case VSD_DTYPE_INT_S32:
553 			bkt_ub->int32.s32 += (int32_t)step;
554 			break;
555 		case VSD_DTYPE_INT_U32:
556 			bkt_ub->int32.u32 += (uint32_t)step;
557 			break;
558 		case VSD_DTYPE_INT_S64:
559 			bkt_ub->int64.s64 += (int64_t)step;
560 			break;
561 		case VSD_DTYPE_INT_U64:
562 			bkt_ub->int64.u64 += (uint64_t)step;
563 			break;
564 		case VSD_DTYPE_INT_SLONG:
565 			bkt_ub->intlong.slong += (long)step;
566 			break;
567 		case VSD_DTYPE_INT_ULONG:
568 			bkt_ub->intlong.ulong += (unsigned long)step;
569 			break;
570 		case VSD_DTYPE_Q_S32:
571 			error = Q_QADDI(&bkt_ub->q32.sq32, step);
572 			break;
573 		case VSD_DTYPE_Q_U32:
574 			error = Q_QADDI(&bkt_ub->q32.uq32, step);
575 			break;
576 		case VSD_DTYPE_Q_S64:
577 			error = Q_QADDI(&bkt_ub->q64.sq64, step);
578 			break;
579 		case VSD_DTYPE_Q_U64:
580 			error = Q_QADDI(&bkt_ub->q64.uq64, step);
581 			break;
582 		default:
583 			break;
584 		}
585 	} else { /* info->scheme != BKT_USR && step == 0 */
586 		return (EINVAL);
587 	}
588 
589 	return (error);
590 }
591 
592 static uint32_t
593 stats_vss_hist_nbkts_hlpr(struct vss_hist_hlpr_info *info)
594 {
595 	struct voistatdata_numeric bkt_lb, bkt_ub;
596 	uint32_t nbkts;
597 	int done;
598 
599 	if (info->scheme == BKT_USR) {
600 		/* XXXLAS: Setting info->{lb,ub} from macro is tricky. */
601 		info->lb = info->usr.bkts[0].lb;
602 		info->ub = info->usr.bkts[info->usr.nbkts - 1].lb;
603 	}
604 
605 	nbkts = 0;
606 	done = 0;
607 	bkt_ub = info->lb;
608 
609 	do {
610 		bkt_lb = bkt_ub;
611 		if (stats_vss_hist_bkt_hlpr(info, nbkts++, &bkt_lb, &bkt_ub))
612 			return (0);
613 
614 		if (info->scheme == BKT_USR)
615 			done = (nbkts == info->usr.nbkts);
616 		else {
617 			switch (info->voi_dtype) {
618 			case VSD_DTYPE_INT_S32:
619 				done = (bkt_ub.int32.s32 > info->ub.int32.s32);
620 				break;
621 			case VSD_DTYPE_INT_U32:
622 				done = (bkt_ub.int32.u32 > info->ub.int32.u32);
623 				break;
624 			case VSD_DTYPE_INT_S64:
625 				done = (bkt_ub.int64.s64 > info->ub.int64.s64);
626 				break;
627 			case VSD_DTYPE_INT_U64:
628 				done = (bkt_ub.int64.u64 > info->ub.int64.u64);
629 				break;
630 			case VSD_DTYPE_INT_SLONG:
631 				done = (bkt_ub.intlong.slong >
632 				    info->ub.intlong.slong);
633 				break;
634 			case VSD_DTYPE_INT_ULONG:
635 				done = (bkt_ub.intlong.ulong >
636 				    info->ub.intlong.ulong);
637 				break;
638 			case VSD_DTYPE_Q_S32:
639 				done = Q_QGTQ(bkt_ub.q32.sq32,
640 				    info->ub.q32.sq32);
641 				break;
642 			case VSD_DTYPE_Q_U32:
643 				done = Q_QGTQ(bkt_ub.q32.uq32,
644 				    info->ub.q32.uq32);
645 				break;
646 			case VSD_DTYPE_Q_S64:
647 				done = Q_QGTQ(bkt_ub.q64.sq64,
648 				    info->ub.q64.sq64);
649 				break;
650 			case VSD_DTYPE_Q_U64:
651 				done = Q_QGTQ(bkt_ub.q64.uq64,
652 				    info->ub.q64.uq64);
653 				break;
654 			default:
655 				return (0);
656 			}
657 		}
658 	} while (!done);
659 
660 	if (info->flags & VSD_HIST_LBOUND_INF)
661 		nbkts++;
662 	if (info->flags & VSD_HIST_UBOUND_INF)
663 		nbkts++;
664 
665 	return (nbkts);
666 }
667 
668 int
669 stats_vss_hist_hlpr(enum vsd_dtype voi_dtype, struct voistatspec *vss,
670     struct vss_hist_hlpr_info *info)
671 {
672 	struct voistatdata_hist *hist;
673 	struct voistatdata_numeric bkt_lb, bkt_ub, *lbinfbktlb, *lbinfbktub,
674 	    *ubinfbktlb, *ubinfbktub;
675 	uint32_t bkt, nbkts, nloop;
676 
677 	if (vss == NULL || info == NULL || (info->flags &
678 	(VSD_HIST_LBOUND_INF|VSD_HIST_UBOUND_INF) && (info->hist_dtype ==
679 	VSD_DTYPE_DVHIST32 || info->hist_dtype == VSD_DTYPE_DVHIST64)))
680 		return (EINVAL);
681 
682 	info->voi_dtype = voi_dtype;
683 
684 	if ((nbkts = stats_vss_hist_nbkts_hlpr(info)) == 0)
685 		return (EINVAL);
686 
687 	switch (info->hist_dtype) {
688 	case VSD_DTYPE_CRHIST32:
689 		vss->vsdsz = HIST_NBKTS2VSDSZ(crhist32, nbkts);
690 		break;
691 	case VSD_DTYPE_DRHIST32:
692 		vss->vsdsz = HIST_NBKTS2VSDSZ(drhist32, nbkts);
693 		break;
694 	case VSD_DTYPE_DVHIST32:
695 		vss->vsdsz = HIST_NBKTS2VSDSZ(dvhist32, nbkts);
696 		break;
697 	case VSD_DTYPE_CRHIST64:
698 		vss->vsdsz = HIST_NBKTS2VSDSZ(crhist64, nbkts);
699 		break;
700 	case VSD_DTYPE_DRHIST64:
701 		vss->vsdsz = HIST_NBKTS2VSDSZ(drhist64, nbkts);
702 		break;
703 	case VSD_DTYPE_DVHIST64:
704 		vss->vsdsz = HIST_NBKTS2VSDSZ(dvhist64, nbkts);
705 		break;
706 	default:
707 		return (EINVAL);
708 	}
709 
710 	vss->iv = stats_realloc(NULL, 0, vss->vsdsz, M_ZERO);
711 	if (vss->iv == NULL)
712 		return (ENOMEM);
713 
714 	hist = (struct voistatdata_hist *)vss->iv;
715 	bkt_ub = info->lb;
716 
717 	for (bkt = (info->flags & VSD_HIST_LBOUND_INF), nloop = 0;
718 	    bkt < nbkts;
719 	    bkt++, nloop++) {
720 		bkt_lb = bkt_ub;
721 		if (stats_vss_hist_bkt_hlpr(info, nloop, &bkt_lb, &bkt_ub))
722 			return (EINVAL);
723 
724 		switch (info->hist_dtype) {
725 		case VSD_DTYPE_CRHIST32:
726 			VSD(crhist32, hist)->bkts[bkt].lb = bkt_lb;
727 			break;
728 		case VSD_DTYPE_DRHIST32:
729 			VSD(drhist32, hist)->bkts[bkt].lb = bkt_lb;
730 			VSD(drhist32, hist)->bkts[bkt].ub = bkt_ub;
731 			break;
732 		case VSD_DTYPE_DVHIST32:
733 			VSD(dvhist32, hist)->bkts[bkt].val = bkt_lb;
734 			break;
735 		case VSD_DTYPE_CRHIST64:
736 			VSD(crhist64, hist)->bkts[bkt].lb = bkt_lb;
737 			break;
738 		case VSD_DTYPE_DRHIST64:
739 			VSD(drhist64, hist)->bkts[bkt].lb = bkt_lb;
740 			VSD(drhist64, hist)->bkts[bkt].ub = bkt_ub;
741 			break;
742 		case VSD_DTYPE_DVHIST64:
743 			VSD(dvhist64, hist)->bkts[bkt].val = bkt_lb;
744 			break;
745 		default:
746 			return (EINVAL);
747 		}
748 	}
749 
750 	lbinfbktlb = lbinfbktub = ubinfbktlb = ubinfbktub = NULL;
751 
752 	switch (info->hist_dtype) {
753 	case VSD_DTYPE_CRHIST32:
754 		lbinfbktlb = &VSD(crhist32, hist)->bkts[0].lb;
755 		ubinfbktlb = &VSD(crhist32, hist)->bkts[nbkts - 1].lb;
756 		break;
757 	case VSD_DTYPE_DRHIST32:
758 		lbinfbktlb = &VSD(drhist32, hist)->bkts[0].lb;
759 		lbinfbktub = &VSD(drhist32, hist)->bkts[0].ub;
760 		ubinfbktlb = &VSD(drhist32, hist)->bkts[nbkts - 1].lb;
761 		ubinfbktub = &VSD(drhist32, hist)->bkts[nbkts - 1].ub;
762 		break;
763 	case VSD_DTYPE_CRHIST64:
764 		lbinfbktlb = &VSD(crhist64, hist)->bkts[0].lb;
765 		ubinfbktlb = &VSD(crhist64, hist)->bkts[nbkts - 1].lb;
766 		break;
767 	case VSD_DTYPE_DRHIST64:
768 		lbinfbktlb = &VSD(drhist64, hist)->bkts[0].lb;
769 		lbinfbktub = &VSD(drhist64, hist)->bkts[0].ub;
770 		ubinfbktlb = &VSD(drhist64, hist)->bkts[nbkts - 1].lb;
771 		ubinfbktub = &VSD(drhist64, hist)->bkts[nbkts - 1].ub;
772 		break;
773 	case VSD_DTYPE_DVHIST32:
774 	case VSD_DTYPE_DVHIST64:
775 		break;
776 	default:
777 		return (EINVAL);
778 	}
779 
780 	if ((info->flags & VSD_HIST_LBOUND_INF) && lbinfbktlb) {
781 		*lbinfbktlb = numeric_limits[LIM_MIN][info->voi_dtype];
782 		/*
783 		 * Assignment from numeric_limit array for Q types assigns max
784 		 * possible integral/fractional value for underlying data type,
785 		 * but we must set control bits for this specific histogram per
786 		 * the user's choice of fractional bits, which we extract from
787 		 * info->lb.
788 		 */
789 		if (info->voi_dtype == VSD_DTYPE_Q_S32 ||
790 		    info->voi_dtype == VSD_DTYPE_Q_U32) {
791 			/* Signedness doesn't matter for setting control bits. */
792 			Q_SCVAL(lbinfbktlb->q32.sq32,
793 			    Q_GCVAL(info->lb.q32.sq32));
794 		} else if (info->voi_dtype == VSD_DTYPE_Q_S64 ||
795 		    info->voi_dtype == VSD_DTYPE_Q_U64) {
796 			/* Signedness doesn't matter for setting control bits. */
797 			Q_SCVAL(lbinfbktlb->q64.sq64,
798 			    Q_GCVAL(info->lb.q64.sq64));
799 		}
800 		if (lbinfbktub)
801 			*lbinfbktub = info->lb;
802 	}
803 	if ((info->flags & VSD_HIST_UBOUND_INF) && ubinfbktlb) {
804 		*ubinfbktlb = bkt_lb;
805 		if (ubinfbktub) {
806 			*ubinfbktub = numeric_limits[LIM_MAX][info->voi_dtype];
807 			if (info->voi_dtype == VSD_DTYPE_Q_S32 ||
808 			    info->voi_dtype == VSD_DTYPE_Q_U32) {
809 				Q_SCVAL(ubinfbktub->q32.sq32,
810 				    Q_GCVAL(info->lb.q32.sq32));
811 			} else if (info->voi_dtype == VSD_DTYPE_Q_S64 ||
812 			    info->voi_dtype == VSD_DTYPE_Q_U64) {
813 				Q_SCVAL(ubinfbktub->q64.sq64,
814 				    Q_GCVAL(info->lb.q64.sq64));
815 			}
816 		}
817 	}
818 
819 	return (0);
820 }
821 
822 int
823 stats_vss_tdgst_hlpr(enum vsd_dtype voi_dtype, struct voistatspec *vss,
824     struct vss_tdgst_hlpr_info *info)
825 {
826 	struct voistatdata_tdgst *tdgst;
827 	struct ctdth32 *ctd32tree;
828 	struct ctdth64 *ctd64tree;
829 	struct voistatdata_tdgstctd32 *ctd32;
830 	struct voistatdata_tdgstctd64 *ctd64;
831 
832 	info->voi_dtype = voi_dtype;
833 
834 	switch (info->tdgst_dtype) {
835 	case VSD_DTYPE_TDGSTCLUST32:
836 		vss->vsdsz = TDGST_NCTRS2VSDSZ(tdgstclust32, info->nctds);
837 		break;
838 	case VSD_DTYPE_TDGSTCLUST64:
839 		vss->vsdsz = TDGST_NCTRS2VSDSZ(tdgstclust64, info->nctds);
840 		break;
841 	default:
842 		return (EINVAL);
843 	}
844 
845 	vss->iv = stats_realloc(NULL, 0, vss->vsdsz, M_ZERO);
846 	if (vss->iv == NULL)
847 		return (ENOMEM);
848 
849 	tdgst = (struct voistatdata_tdgst *)vss->iv;
850 
851 	switch (info->tdgst_dtype) {
852 	case VSD_DTYPE_TDGSTCLUST32:
853 		ctd32tree = &VSD(tdgstclust32, tdgst)->ctdtree;
854 		ARB_INIT(ctd32, ctdlnk, ctd32tree, info->nctds) {
855 			Q_INI(&ctd32->mu, 0, 0, info->prec);
856 		}
857 		break;
858 	case VSD_DTYPE_TDGSTCLUST64:
859 		ctd64tree = &VSD(tdgstclust64, tdgst)->ctdtree;
860 		ARB_INIT(ctd64, ctdlnk, ctd64tree, info->nctds) {
861 			Q_INI(&ctd64->mu, 0, 0, info->prec);
862 		}
863 		break;
864 	default:
865 		return (EINVAL);
866 	}
867 
868 	return (0);
869 }
870 
871 int
872 stats_vss_numeric_hlpr(enum vsd_dtype voi_dtype, struct voistatspec *vss,
873     struct vss_numeric_hlpr_info *info)
874 {
875 	struct voistatdata_numeric iv;
876 
877 	switch (vss->stype) {
878 	case VS_STYPE_SUM:
879 		iv = stats_ctor_vsd_numeric(0);
880 		break;
881 	case VS_STYPE_MIN:
882 		iv = numeric_limits[LIM_MAX][voi_dtype];
883 		break;
884 	case VS_STYPE_MAX:
885 		iv = numeric_limits[LIM_MIN][voi_dtype];
886 		break;
887 	default:
888 		return (EINVAL);
889 	}
890 
891 	vss->iv = stats_realloc(NULL, 0, vsd_dtype2size[voi_dtype], 0);
892 	if (vss->iv == NULL)
893 		return (ENOMEM);
894 
895 	vss->vs_dtype = voi_dtype;
896 	vss->vsdsz = vsd_dtype2size[voi_dtype];
897 	switch (voi_dtype) {
898 	case VSD_DTYPE_INT_S32:
899 		*((int32_t *)vss->iv) = iv.int32.s32;
900 		break;
901 	case VSD_DTYPE_INT_U32:
902 		*((uint32_t *)vss->iv) = iv.int32.u32;
903 		break;
904 	case VSD_DTYPE_INT_S64:
905 		*((int64_t *)vss->iv) = iv.int64.s64;
906 		break;
907 	case VSD_DTYPE_INT_U64:
908 		*((uint64_t *)vss->iv) = iv.int64.u64;
909 		break;
910 	case VSD_DTYPE_INT_SLONG:
911 		*((long *)vss->iv) = iv.intlong.slong;
912 		break;
913 	case VSD_DTYPE_INT_ULONG:
914 		*((unsigned long *)vss->iv) = iv.intlong.ulong;
915 		break;
916 	case VSD_DTYPE_Q_S32:
917 		*((s32q_t *)vss->iv) = Q_SCVAL(iv.q32.sq32,
918 		    Q_CTRLINI(info->prec));
919 		break;
920 	case VSD_DTYPE_Q_U32:
921 		*((u32q_t *)vss->iv) = Q_SCVAL(iv.q32.uq32,
922 		    Q_CTRLINI(info->prec));
923 		break;
924 	case VSD_DTYPE_Q_S64:
925 		*((s64q_t *)vss->iv) = Q_SCVAL(iv.q64.sq64,
926 		    Q_CTRLINI(info->prec));
927 		break;
928 	case VSD_DTYPE_Q_U64:
929 		*((u64q_t *)vss->iv) = Q_SCVAL(iv.q64.uq64,
930 		    Q_CTRLINI(info->prec));
931 		break;
932 	default:
933 		break;
934 	}
935 
936 	return (0);
937 }
938 
939 int
940 stats_vss_hlpr_init(enum vsd_dtype voi_dtype, uint32_t nvss,
941     struct voistatspec *vss)
942 {
943 	int i, ret;
944 
945 	for (i = nvss - 1; i >= 0; i--) {
946 		if (vss[i].hlpr && (ret = vss[i].hlpr(voi_dtype, &vss[i],
947 		    vss[i].hlprinfo)) != 0)
948 			return (ret);
949 	}
950 
951 	return (0);
952 }
953 
954 void
955 stats_vss_hlpr_cleanup(uint32_t nvss, struct voistatspec *vss)
956 {
957 	int i;
958 
959 	for (i = nvss - 1; i >= 0; i--) {
960 		if (vss[i].hlpr) {
961 			stats_free((void *)vss[i].iv);
962 			vss[i].iv = NULL;
963 		}
964 	}
965 }
966 
967 int
968 stats_tpl_fetch(int tpl_id, struct statsblob_tpl **tpl)
969 {
970 	int error;
971 
972 	error = 0;
973 
974 	TPL_LIST_WLOCK();
975 	if (tpl_id < 0 || tpl_id >= (int)ntpl) {
976 		error = ENOENT;
977 	} else {
978 		*tpl = tpllist[tpl_id];
979 		/* XXXLAS: Acquire refcount on tpl. */
980 	}
981 	TPL_LIST_WUNLOCK();
982 
983 	return (error);
984 }
985 
986 int
987 stats_tpl_fetch_allocid(const char *name, uint32_t hash)
988 {
989 	int i, tpl_id;
990 
991 	tpl_id = -ESRCH;
992 
993 	TPL_LIST_RLOCK();
994 	for (i = ntpl - 1; i >= 0; i--) {
995 		if (name != NULL) {
996 			if (strlen(name) == strlen(tpllist[i]->mb->tplname) &&
997 			    strncmp(name, tpllist[i]->mb->tplname,
998 			    TPL_MAX_NAME_LEN) == 0 && (!hash || hash ==
999 			    tpllist[i]->mb->tplhash)) {
1000 				tpl_id = i;
1001 				break;
1002 			}
1003 		} else if (hash == tpllist[i]->mb->tplhash) {
1004 			tpl_id = i;
1005 			break;
1006 		}
1007 	}
1008 	TPL_LIST_RUNLOCK();
1009 
1010 	return (tpl_id);
1011 }
1012 
1013 int
1014 stats_tpl_id2name(uint32_t tpl_id, char *buf, size_t len)
1015 {
1016 	int error;
1017 
1018 	error = 0;
1019 
1020 	TPL_LIST_RLOCK();
1021 	if (tpl_id < ntpl) {
1022 		if (buf != NULL && len > strlen(tpllist[tpl_id]->mb->tplname))
1023 			strlcpy(buf, tpllist[tpl_id]->mb->tplname, len);
1024 		else
1025 			error = EOVERFLOW;
1026 	} else
1027 		error = ENOENT;
1028 	TPL_LIST_RUNLOCK();
1029 
1030 	return (error);
1031 }
1032 
1033 int
1034 stats_tpl_sample_rollthedice(struct stats_tpl_sample_rate *rates, int nrates,
1035     void *seed_bytes, size_t seed_len)
1036 {
1037 	uint32_t cum_pct, rnd_pct;
1038 	int i;
1039 
1040 	cum_pct = 0;
1041 
1042 	/*
1043 	 * Choose a pseudorandom or seeded number in range [0,100] and use
1044 	 * it to make a sampling decision and template selection where required.
1045 	 * If no seed is supplied, a PRNG is used to generate a pseudorandom
1046 	 * number so that every selection is independent. If a seed is supplied,
1047 	 * the caller desires random selection across different seeds, but
1048 	 * deterministic selection given the same seed. This is achieved by
1049 	 * hashing the seed and using the hash as the random number source.
1050 	 *
1051 	 * XXXLAS: Characterise hash function output distribution.
1052 	 */
1053 	if (seed_bytes == NULL)
1054 		rnd_pct = random() / (INT32_MAX / 100);
1055 	else
1056 		rnd_pct = hash32_buf(seed_bytes, seed_len, 0) /
1057 		    (UINT32_MAX / 100U);
1058 
1059 	/*
1060 	 * We map the randomly selected percentage on to the interval [0,100]
1061 	 * consisting of the cumulatively summed template sampling percentages.
1062 	 * The difference between the cumulative sum of all template sampling
1063 	 * percentages and 100 is treated as a NULL assignment i.e. no stats
1064 	 * template will be assigned, and -1 returned instead.
1065 	 */
1066 	for (i = 0; i < nrates; i++) {
1067 		cum_pct += rates[i].tpl_sample_pct;
1068 
1069 		KASSERT(cum_pct <= 100, ("%s cum_pct %u > 100", __func__,
1070 		    cum_pct));
1071 		if (rnd_pct > cum_pct || rates[i].tpl_sample_pct == 0)
1072 			continue;
1073 
1074 		return (rates[i].tpl_slot_id);
1075 	}
1076 
1077 	return (-1);
1078 }
1079 
1080 int
1081 stats_v1_blob_clone(struct statsblobv1 **dst, size_t dstmaxsz,
1082     struct statsblobv1 *src, uint32_t flags)
1083 {
1084 	int error;
1085 
1086 	error = 0;
1087 
1088 	if (src == NULL || dst == NULL ||
1089 	    src->cursz < sizeof(struct statsblob) ||
1090 	    ((flags & SB_CLONE_ALLOCDST) &&
1091 	    (flags & (SB_CLONE_USRDSTNOFAULT | SB_CLONE_USRDST)))) {
1092 		error = EINVAL;
1093 	} else if (flags & SB_CLONE_ALLOCDST) {
1094 		*dst = stats_realloc(NULL, 0, src->cursz, 0);
1095 		if (*dst)
1096 			(*dst)->maxsz = dstmaxsz = src->cursz;
1097 		else
1098 			error = ENOMEM;
1099 	} else if (*dst == NULL || dstmaxsz < sizeof(struct statsblob)) {
1100 		error = EINVAL;
1101 	}
1102 
1103 	if (!error) {
1104 		size_t postcurszlen;
1105 
1106 		/*
1107 		 * Clone src into dst except for the maxsz field. If dst is too
1108 		 * small to hold all of src, only copy src's header and return
1109 		 * EOVERFLOW.
1110 		 */
1111 #ifdef _KERNEL
1112 		if (flags & SB_CLONE_USRDSTNOFAULT)
1113 			copyout_nofault(src, *dst,
1114 			    offsetof(struct statsblob, maxsz));
1115 		else if (flags & SB_CLONE_USRDST)
1116 			copyout(src, *dst, offsetof(struct statsblob, maxsz));
1117 		else
1118 #endif
1119 			memcpy(*dst, src, offsetof(struct statsblob, maxsz));
1120 
1121 		if (dstmaxsz >= src->cursz) {
1122 			postcurszlen = src->cursz -
1123 			    offsetof(struct statsblob, cursz);
1124 		} else {
1125 			error = EOVERFLOW;
1126 			postcurszlen = sizeof(struct statsblob) -
1127 			    offsetof(struct statsblob, cursz);
1128 		}
1129 #ifdef _KERNEL
1130 		if (flags & SB_CLONE_USRDSTNOFAULT)
1131 			copyout_nofault(&(src->cursz), &((*dst)->cursz),
1132 			    postcurszlen);
1133 		else if (flags & SB_CLONE_USRDST)
1134 			copyout(&(src->cursz), &((*dst)->cursz), postcurszlen);
1135 		else
1136 #endif
1137 			memcpy(&((*dst)->cursz), &(src->cursz), postcurszlen);
1138 	}
1139 
1140 	return (error);
1141 }
1142 
1143 int
1144 stats_v1_tpl_alloc(const char *name, uint32_t flags __unused)
1145 {
1146 	struct statsblobv1_tpl *tpl, **newtpllist;
1147 	struct statsblobv1 *tpl_sb;
1148 	struct metablob *tpl_mb;
1149 	int tpl_id;
1150 
1151 	if (name != NULL && strlen(name) > TPL_MAX_NAME_LEN)
1152 		return (-EINVAL);
1153 
1154 	if (name != NULL && stats_tpl_fetch_allocid(name, 0) >= 0)
1155 		return (-EEXIST);
1156 
1157 	tpl = stats_realloc(NULL, 0, sizeof(struct statsblobv1_tpl), M_ZERO);
1158 	tpl_mb = stats_realloc(NULL, 0, sizeof(struct metablob), M_ZERO);
1159 	tpl_sb = stats_realloc(NULL, 0, sizeof(struct statsblobv1), M_ZERO);
1160 
1161 	if (tpl_mb != NULL && name != NULL)
1162 		tpl_mb->tplname = stats_strdup(name, 0);
1163 
1164 	if (tpl == NULL || tpl_sb == NULL || tpl_mb == NULL ||
1165 	    tpl_mb->tplname == NULL) {
1166 		stats_free(tpl);
1167 		stats_free(tpl_sb);
1168 		if (tpl_mb != NULL) {
1169 			stats_free(tpl_mb->tplname);
1170 			stats_free(tpl_mb);
1171 		}
1172 		return (-ENOMEM);
1173 	}
1174 
1175 	tpl->mb = tpl_mb;
1176 	tpl->sb = tpl_sb;
1177 
1178 	tpl_sb->abi = STATS_ABI_V1;
1179 	tpl_sb->endian =
1180 #if BYTE_ORDER == LITTLE_ENDIAN
1181 	    SB_LE;
1182 #elif BYTE_ORDER == BIG_ENDIAN
1183 	    SB_BE;
1184 #else
1185 	    SB_UE;
1186 #endif
1187 	tpl_sb->cursz = tpl_sb->maxsz = sizeof(struct statsblobv1);
1188 	tpl_sb->stats_off = tpl_sb->statsdata_off = sizeof(struct statsblobv1);
1189 
1190 	TPL_LIST_WLOCK();
1191 	newtpllist = stats_realloc(tpllist, ntpl * sizeof(void *),
1192 	    (ntpl + 1) * sizeof(void *), 0);
1193 	if (newtpllist != NULL) {
1194 		tpl_id = ntpl++;
1195 		tpllist = (struct statsblob_tpl **)newtpllist;
1196 		tpllist[tpl_id] = (struct statsblob_tpl *)tpl;
1197 		stats_tpl_update_hash(tpllist[tpl_id]);
1198 	} else {
1199 		stats_free(tpl);
1200 		stats_free(tpl_sb);
1201 		if (tpl_mb != NULL) {
1202 			stats_free(tpl_mb->tplname);
1203 			stats_free(tpl_mb);
1204 		}
1205 		tpl_id = -ENOMEM;
1206 	}
1207 	TPL_LIST_WUNLOCK();
1208 
1209 	return (tpl_id);
1210 }
1211 
1212 int
1213 stats_v1_tpl_add_voistats(uint32_t tpl_id, int32_t voi_id, const char *voi_name,
1214     enum vsd_dtype voi_dtype, uint32_t nvss, struct voistatspec *vss,
1215     uint32_t flags)
1216 {
1217 	struct voi *voi;
1218 	struct voistat *tmpstat;
1219 	struct statsblobv1 *tpl_sb;
1220 	struct metablob *tpl_mb;
1221 	int error, i, newstatdataidx, newvoibytes, newvoistatbytes,
1222 	    newvoistatdatabytes, newvoistatmaxid;
1223 	uint32_t nbytes;
1224 
1225 	if (voi_id < 0 || voi_dtype == 0 || voi_dtype >= VSD_NUM_DTYPES ||
1226 	    nvss == 0 || vss == NULL)
1227 		return (EINVAL);
1228 
1229 	error = nbytes = newvoibytes = newvoistatbytes =
1230 	    newvoistatdatabytes = 0;
1231 	newvoistatmaxid = -1;
1232 
1233 	/* Calculate the number of bytes required for the new voistats. */
1234 	for (i = nvss - 1; i >= 0; i--) {
1235 		if (vss[i].stype == 0 || vss[i].stype >= VS_NUM_STYPES ||
1236 		    vss[i].vs_dtype == 0 || vss[i].vs_dtype >= VSD_NUM_DTYPES ||
1237 		    vss[i].iv == NULL || vss[i].vsdsz == 0)
1238 			return (EINVAL);
1239 		if ((int)vss[i].stype > newvoistatmaxid)
1240 			newvoistatmaxid = vss[i].stype;
1241 		newvoistatdatabytes += vss[i].vsdsz;
1242 	}
1243 
1244 	if (flags & SB_VOI_RELUPDATE) {
1245 		/* XXXLAS: VOI state bytes may need to vary based on stat types. */
1246 		newvoistatdatabytes += sizeof(struct voistatdata_voistate);
1247 	}
1248 	nbytes += newvoistatdatabytes;
1249 
1250 	TPL_LIST_WLOCK();
1251 	if (tpl_id < ntpl) {
1252 		tpl_sb = (struct statsblobv1 *)tpllist[tpl_id]->sb;
1253 		tpl_mb = tpllist[tpl_id]->mb;
1254 
1255 		if (voi_id >= NVOIS(tpl_sb) || tpl_sb->vois[voi_id].id == -1) {
1256 			/* Adding a new VOI and associated stats. */
1257 			if (voi_id >= NVOIS(tpl_sb)) {
1258 				/* We need to grow the tpl_sb->vois array. */
1259 				newvoibytes = (voi_id - (NVOIS(tpl_sb) - 1)) *
1260 				    sizeof(struct voi);
1261 				nbytes += newvoibytes;
1262 			}
1263 			newvoistatbytes =
1264 			    (newvoistatmaxid + 1) * sizeof(struct voistat);
1265 		} else {
1266 			/* Adding stats to an existing VOI. */
1267 			if (newvoistatmaxid >
1268 			    tpl_sb->vois[voi_id].voistatmaxid) {
1269 				newvoistatbytes = (newvoistatmaxid -
1270 				    tpl_sb->vois[voi_id].voistatmaxid) *
1271 				    sizeof(struct voistat);
1272 			}
1273 			/* XXXLAS: KPI does not yet support expanding VOIs. */
1274 			error = EOPNOTSUPP;
1275 		}
1276 		nbytes += newvoistatbytes;
1277 
1278 		if (!error && newvoibytes > 0) {
1279 			struct voi_meta *voi_meta = tpl_mb->voi_meta;
1280 
1281 			voi_meta = stats_realloc(voi_meta, voi_meta == NULL ?
1282 			    0 : NVOIS(tpl_sb) * sizeof(struct voi_meta),
1283 			    (1 + voi_id) * sizeof(struct voi_meta),
1284 			    M_ZERO);
1285 
1286 			if (voi_meta == NULL)
1287 				error = ENOMEM;
1288 			else
1289 				tpl_mb->voi_meta = voi_meta;
1290 		}
1291 
1292 		if (!error) {
1293 			/* NB: Resizing can change where tpl_sb points. */
1294 			error = stats_v1_blob_expand(&tpl_sb, newvoibytes,
1295 			    newvoistatbytes, newvoistatdatabytes);
1296 		}
1297 
1298 		if (!error) {
1299 			tpl_mb->voi_meta[voi_id].name = stats_strdup(voi_name,
1300 			    0);
1301 			if (tpl_mb->voi_meta[voi_id].name == NULL)
1302 				error = ENOMEM;
1303 		}
1304 
1305 		if (!error) {
1306 			/* Update the template list with the resized pointer. */
1307 			tpllist[tpl_id]->sb = (struct statsblob *)tpl_sb;
1308 
1309 			/* Update the template. */
1310 			voi = &tpl_sb->vois[voi_id];
1311 
1312 			if (voi->id < 0) {
1313 				/* VOI is new and needs to be initialised. */
1314 				voi->id = voi_id;
1315 				voi->dtype = voi_dtype;
1316 				voi->stats_off = tpl_sb->stats_off;
1317 				if (flags & SB_VOI_RELUPDATE)
1318 					voi->flags |= VOI_REQSTATE;
1319 			} else {
1320 				/*
1321 				 * XXXLAS: When this else block is written, the
1322 				 * "KPI does not yet support expanding VOIs"
1323 				 * error earlier in this function can be
1324 				 * removed. What is required here is to shuffle
1325 				 * the voistat array such that the new stats for
1326 				 * the voi are contiguous, which will displace
1327 				 * stats for other vois that reside after the
1328 				 * voi being updated. The other vois then need
1329 				 * to have their stats_off adjusted post
1330 				 * shuffle.
1331 				 */
1332 			}
1333 
1334 			voi->voistatmaxid = newvoistatmaxid;
1335 			newstatdataidx = 0;
1336 
1337 			if (voi->flags & VOI_REQSTATE) {
1338 				/* Initialise the voistate stat in slot 0. */
1339 				tmpstat = BLOB_OFFSET(tpl_sb, voi->stats_off);
1340 				tmpstat->stype = VS_STYPE_VOISTATE;
1341 				tmpstat->flags = 0;
1342 				tmpstat->dtype = VSD_DTYPE_VOISTATE;
1343 				newstatdataidx = tmpstat->dsz =
1344 				    sizeof(struct voistatdata_numeric);
1345 				tmpstat->data_off = tpl_sb->statsdata_off;
1346 			}
1347 
1348 			for (i = 0; (uint32_t)i < nvss; i++) {
1349 				tmpstat = BLOB_OFFSET(tpl_sb, voi->stats_off +
1350 				    (vss[i].stype * sizeof(struct voistat)));
1351 				KASSERT(tmpstat->stype < 0, ("voistat %p "
1352 				    "already initialised", tmpstat));
1353 				tmpstat->stype = vss[i].stype;
1354 				tmpstat->flags = vss[i].flags;
1355 				tmpstat->dtype = vss[i].vs_dtype;
1356 				tmpstat->dsz = vss[i].vsdsz;
1357 				tmpstat->data_off = tpl_sb->statsdata_off +
1358 				    newstatdataidx;
1359 				memcpy(BLOB_OFFSET(tpl_sb, tmpstat->data_off),
1360 				    vss[i].iv, vss[i].vsdsz);
1361 				newstatdataidx += vss[i].vsdsz;
1362 			}
1363 
1364 			/* Update the template version hash. */
1365 			stats_tpl_update_hash(tpllist[tpl_id]);
1366 			/* XXXLAS: Confirm tpl name/hash pair remains unique. */
1367 		}
1368 	} else
1369 		error = EINVAL;
1370 	TPL_LIST_WUNLOCK();
1371 
1372 	return (error);
1373 }
1374 
1375 struct statsblobv1 *
1376 stats_v1_blob_alloc(uint32_t tpl_id, uint32_t flags __unused)
1377 {
1378 	struct statsblobv1 *sb;
1379 	int error;
1380 
1381 	sb = NULL;
1382 
1383 	TPL_LIST_RLOCK();
1384 	if (tpl_id < ntpl) {
1385 		sb = stats_realloc(NULL, 0, tpllist[tpl_id]->sb->maxsz, 0);
1386 		if (sb != NULL) {
1387 			sb->maxsz = tpllist[tpl_id]->sb->maxsz;
1388 			error = stats_v1_blob_init_locked(sb, tpl_id, 0);
1389 		} else
1390 			error = ENOMEM;
1391 
1392 		if (error) {
1393 			stats_free(sb);
1394 			sb = NULL;
1395 		}
1396 	}
1397 	TPL_LIST_RUNLOCK();
1398 
1399 	return (sb);
1400 }
1401 
1402 void
1403 stats_v1_blob_destroy(struct statsblobv1 *sb)
1404 {
1405 
1406 	stats_free(sb);
1407 }
1408 
1409 int
1410 stats_v1_voistat_fetch_dptr(struct statsblobv1 *sb, int32_t voi_id,
1411     enum voi_stype stype, enum vsd_dtype *retdtype, struct voistatdata **retvsd,
1412     size_t *retvsdsz)
1413 {
1414 	struct voi *v;
1415 	struct voistat *vs;
1416 
1417 	if (retvsd == NULL || sb == NULL || sb->abi != STATS_ABI_V1 ||
1418 	    voi_id >= NVOIS(sb))
1419 		return (EINVAL);
1420 
1421 	v = &sb->vois[voi_id];
1422 	if ((__typeof(v->voistatmaxid))stype > v->voistatmaxid)
1423 		return (EINVAL);
1424 
1425 	vs = BLOB_OFFSET(sb, v->stats_off + (stype * sizeof(struct voistat)));
1426 	*retvsd = BLOB_OFFSET(sb, vs->data_off);
1427 	if (retdtype != NULL)
1428 		*retdtype = vs->dtype;
1429 	if (retvsdsz != NULL)
1430 		*retvsdsz = vs->dsz;
1431 
1432 	return (0);
1433 }
1434 
1435 int
1436 stats_v1_blob_init(struct statsblobv1 *sb, uint32_t tpl_id, uint32_t flags)
1437 {
1438 	int error;
1439 
1440 	error = 0;
1441 
1442 	TPL_LIST_RLOCK();
1443 	if (sb == NULL || tpl_id >= ntpl) {
1444 		error = EINVAL;
1445 	} else {
1446 		error = stats_v1_blob_init_locked(sb, tpl_id, flags);
1447 	}
1448 	TPL_LIST_RUNLOCK();
1449 
1450 	return (error);
1451 }
1452 
1453 static inline int
1454 stats_v1_blob_init_locked(struct statsblobv1 *sb, uint32_t tpl_id,
1455     uint32_t flags __unused)
1456 {
1457 	int error;
1458 
1459 	TPL_LIST_RLOCK_ASSERT();
1460 	error = (sb->maxsz >= tpllist[tpl_id]->sb->cursz) ? 0 : EOVERFLOW;
1461 	KASSERT(!error,
1462 	    ("sb %d instead of %d bytes", sb->maxsz, tpllist[tpl_id]->sb->cursz));
1463 
1464 	if (!error) {
1465 		memcpy(sb, tpllist[tpl_id]->sb, tpllist[tpl_id]->sb->cursz);
1466 		sb->created = sb->lastrst = stats_sbinuptime();
1467 		sb->tplhash = tpllist[tpl_id]->mb->tplhash;
1468 	}
1469 
1470 	return (error);
1471 }
1472 
1473 static int
1474 stats_v1_blob_expand(struct statsblobv1 **sbpp, int newvoibytes,
1475     int newvoistatbytes, int newvoistatdatabytes)
1476 {
1477 	struct statsblobv1 *sb;
1478 	struct voi *tmpvoi;
1479 	struct voistat *tmpvoistat, *voistat_array;
1480 	int error, i, idxnewvois, idxnewvoistats, nbytes, nvoistats;
1481 
1482 	KASSERT(newvoibytes % sizeof(struct voi) == 0,
1483 	    ("Bad newvoibytes %d", newvoibytes));
1484 	KASSERT(newvoistatbytes % sizeof(struct voistat) == 0,
1485 	    ("Bad newvoistatbytes %d", newvoistatbytes));
1486 
1487 	error = ((newvoibytes % sizeof(struct voi) == 0) &&
1488 	    (newvoistatbytes % sizeof(struct voistat) == 0)) ? 0 : EINVAL;
1489 	sb = *sbpp;
1490 	nbytes = newvoibytes + newvoistatbytes + newvoistatdatabytes;
1491 
1492 	/*
1493 	 * XXXLAS: Required until we gain support for flags which alter the
1494 	 * units of size/offset fields in key structs.
1495 	 */
1496 	if (!error && ((((int)sb->cursz) + nbytes) > SB_V1_MAXSZ))
1497 		error = EFBIG;
1498 
1499 	if (!error && (sb->cursz + nbytes > sb->maxsz)) {
1500 		/* Need to expand our blob. */
1501 		sb = stats_realloc(sb, sb->maxsz, sb->cursz + nbytes, M_ZERO);
1502 		if (sb != NULL) {
1503 			sb->maxsz = sb->cursz + nbytes;
1504 			*sbpp = sb;
1505 		} else
1506 		    error = ENOMEM;
1507 	}
1508 
1509 	if (!error) {
1510 		/*
1511 		 * Shuffle memory within the expanded blob working from the end
1512 		 * backwards, leaving gaps for the new voistat and voistatdata
1513 		 * structs at the beginning of their respective blob regions,
1514 		 * and for the new voi structs at the end of their blob region.
1515 		 */
1516 		memmove(BLOB_OFFSET(sb, sb->statsdata_off + nbytes),
1517 		    BLOB_OFFSET(sb, sb->statsdata_off),
1518 		    sb->cursz - sb->statsdata_off);
1519 		memmove(BLOB_OFFSET(sb, sb->stats_off + newvoibytes +
1520 		    newvoistatbytes), BLOB_OFFSET(sb, sb->stats_off),
1521 		    sb->statsdata_off - sb->stats_off);
1522 
1523 		/* First index of new voi/voistat structs to be initialised. */
1524 		idxnewvois = NVOIS(sb);
1525 		idxnewvoistats = (newvoistatbytes / sizeof(struct voistat)) - 1;
1526 
1527 		/* Update housekeeping variables and offsets. */
1528 		sb->cursz += nbytes;
1529 		sb->stats_off += newvoibytes;
1530 		sb->statsdata_off += newvoibytes + newvoistatbytes;
1531 
1532 		/* XXXLAS: Zeroing not strictly needed but aids debugging. */
1533 		memset(&sb->vois[idxnewvois], '\0', newvoibytes);
1534 		memset(BLOB_OFFSET(sb, sb->stats_off), '\0',
1535 		    newvoistatbytes);
1536 		memset(BLOB_OFFSET(sb, sb->statsdata_off), '\0',
1537 		    newvoistatdatabytes);
1538 
1539 		/* Initialise new voi array members and update offsets. */
1540 		for (i = 0; i < NVOIS(sb); i++) {
1541 			tmpvoi = &sb->vois[i];
1542 			if (i >= idxnewvois) {
1543 				tmpvoi->id = tmpvoi->voistatmaxid = -1;
1544 			} else if (tmpvoi->id > -1) {
1545 				tmpvoi->stats_off += newvoibytes +
1546 				    newvoistatbytes;
1547 			}
1548 		}
1549 
1550 		/* Initialise new voistat array members and update offsets. */
1551 		nvoistats = (sb->statsdata_off - sb->stats_off) /
1552 		    sizeof(struct voistat);
1553 		voistat_array = BLOB_OFFSET(sb, sb->stats_off);
1554 		for (i = 0; i < nvoistats; i++) {
1555 			tmpvoistat = &voistat_array[i];
1556 			if (i <= idxnewvoistats) {
1557 				tmpvoistat->stype = -1;
1558 			} else if (tmpvoistat->stype > -1) {
1559 				tmpvoistat->data_off += nbytes;
1560 			}
1561 		}
1562 	}
1563 
1564 	return (error);
1565 }
1566 
1567 static void
1568 stats_v1_blob_finalise(struct statsblobv1 *sb __unused)
1569 {
1570 
1571 	/* XXXLAS: Fill this in. */
1572 }
1573 
1574 static void
1575 stats_v1_blob_iter(struct statsblobv1 *sb, stats_v1_blob_itercb_t icb,
1576     void *usrctx, uint32_t flags)
1577 {
1578 	struct voi *v;
1579 	struct voistat *vs;
1580 	struct sb_iter_ctx ctx;
1581 	int i, j, firstvoi;
1582 
1583 	ctx.usrctx = usrctx;
1584 	ctx.flags = SB_IT_FIRST_CB;
1585 	firstvoi = 1;
1586 
1587 	for (i = 0; i < NVOIS(sb); i++) {
1588 		v = &sb->vois[i];
1589 		ctx.vslot = i;
1590 		ctx.vsslot = -1;
1591 		ctx.flags |= SB_IT_FIRST_VOISTAT;
1592 
1593 		if (firstvoi)
1594 			ctx.flags |= SB_IT_FIRST_VOI;
1595 		else if (i == (NVOIS(sb) - 1))
1596 			ctx.flags |= SB_IT_LAST_VOI | SB_IT_LAST_CB;
1597 
1598 		if (v->id < 0 && (flags & SB_IT_NULLVOI)) {
1599 			if (icb(sb, v, NULL, &ctx))
1600 				return;
1601 			firstvoi = 0;
1602 			ctx.flags &= ~SB_IT_FIRST_CB;
1603 		}
1604 
1605 		/* If NULL voi, v->voistatmaxid == -1 */
1606 		for (j = 0; j <= v->voistatmaxid; j++) {
1607 			vs = &((struct voistat *)BLOB_OFFSET(sb,
1608 			    v->stats_off))[j];
1609 			if (vs->stype < 0 &&
1610 			    !(flags & SB_IT_NULLVOISTAT))
1611 				continue;
1612 
1613 			if (j == v->voistatmaxid) {
1614 				ctx.flags |= SB_IT_LAST_VOISTAT;
1615 				if (i == (NVOIS(sb) - 1))
1616 					ctx.flags |=
1617 					    SB_IT_LAST_CB;
1618 			} else
1619 				ctx.flags &= ~SB_IT_LAST_CB;
1620 
1621 			ctx.vsslot = j;
1622 			if (icb(sb, v, vs, &ctx))
1623 				return;
1624 
1625 			ctx.flags &= ~(SB_IT_FIRST_CB | SB_IT_FIRST_VOISTAT |
1626 			    SB_IT_LAST_VOISTAT);
1627 		}
1628 		ctx.flags &= ~(SB_IT_FIRST_VOI | SB_IT_LAST_VOI);
1629 	}
1630 }
1631 
1632 static inline void
1633 stats_voistatdata_tdgst_tostr(enum vsd_dtype voi_dtype __unused,
1634     const struct voistatdata_tdgst *tdgst, enum vsd_dtype tdgst_dtype,
1635     size_t tdgst_dsz __unused, enum sb_str_fmt fmt, struct sbuf *buf, int objdump)
1636 {
1637 	const struct ctdth32 *ctd32tree;
1638 	const struct ctdth64 *ctd64tree;
1639 	const struct voistatdata_tdgstctd32 *ctd32;
1640 	const struct voistatdata_tdgstctd64 *ctd64;
1641 	const char *fmtstr;
1642 	uint64_t smplcnt, compcnt;
1643 	int is32bit, qmaxstrlen;
1644 	uint16_t maxctds, curctds;
1645 
1646 	switch (tdgst_dtype) {
1647 	case VSD_DTYPE_TDGSTCLUST32:
1648 		smplcnt = CONSTVSD(tdgstclust32, tdgst)->smplcnt;
1649 		compcnt = CONSTVSD(tdgstclust32, tdgst)->compcnt;
1650 		maxctds = ARB_MAXNODES(&CONSTVSD(tdgstclust32, tdgst)->ctdtree);
1651 		curctds = ARB_CURNODES(&CONSTVSD(tdgstclust32, tdgst)->ctdtree);
1652 		ctd32tree = &CONSTVSD(tdgstclust32, tdgst)->ctdtree;
1653 		ctd32 = (objdump ? ARB_CNODE(ctd32tree, 0) :
1654 		    ARB_CMIN(ctdth32, ctd32tree));
1655 		qmaxstrlen = (ctd32 == NULL) ? 1 : Q_MAXSTRLEN(ctd32->mu, 10);
1656 		is32bit = 1;
1657 		ctd64tree = NULL;
1658 		ctd64 = NULL;
1659 		break;
1660 	case VSD_DTYPE_TDGSTCLUST64:
1661 		smplcnt = CONSTVSD(tdgstclust64, tdgst)->smplcnt;
1662 		compcnt = CONSTVSD(tdgstclust64, tdgst)->compcnt;
1663 		maxctds = ARB_MAXNODES(&CONSTVSD(tdgstclust64, tdgst)->ctdtree);
1664 		curctds = ARB_CURNODES(&CONSTVSD(tdgstclust64, tdgst)->ctdtree);
1665 		ctd64tree = &CONSTVSD(tdgstclust64, tdgst)->ctdtree;
1666 		ctd64 = (objdump ? ARB_CNODE(ctd64tree, 0) :
1667 		    ARB_CMIN(ctdth64, ctd64tree));
1668 		qmaxstrlen = (ctd64 == NULL) ? 1 : Q_MAXSTRLEN(ctd64->mu, 10);
1669 		is32bit = 0;
1670 		ctd32tree = NULL;
1671 		ctd32 = NULL;
1672 		break;
1673 	default:
1674 		return;
1675 	}
1676 
1677 	switch (fmt) {
1678 	case SB_STRFMT_FREEFORM:
1679 		fmtstr = "smplcnt=%ju, compcnt=%ju, maxctds=%hu, nctds=%hu";
1680 		break;
1681 	case SB_STRFMT_JSON:
1682 	default:
1683 		fmtstr =
1684 		    "\"smplcnt\":%ju,\"compcnt\":%ju,\"maxctds\":%hu,"
1685 		    "\"nctds\":%hu,\"ctds\":[";
1686 		break;
1687 	}
1688 	sbuf_printf(buf, fmtstr, (uintmax_t)smplcnt, (uintmax_t)compcnt,
1689 	    maxctds, curctds);
1690 
1691 	while ((is32bit ? NULL != ctd32 : NULL != ctd64)) {
1692 		char qstr[qmaxstrlen];
1693 
1694 		switch (fmt) {
1695 		case SB_STRFMT_FREEFORM:
1696 			fmtstr = "\n\t\t\t\t";
1697 			break;
1698 		case SB_STRFMT_JSON:
1699 		default:
1700 			fmtstr = "{";
1701 			break;
1702 		}
1703 		sbuf_cat(buf, fmtstr);
1704 
1705 		if (objdump) {
1706 			switch (fmt) {
1707 			case SB_STRFMT_FREEFORM:
1708 				fmtstr = "ctd[%hu].";
1709 				break;
1710 			case SB_STRFMT_JSON:
1711 			default:
1712 				fmtstr = "\"ctd\":%hu,";
1713 				break;
1714 			}
1715 			sbuf_printf(buf, fmtstr, is32bit ?
1716 			    ARB_SELFIDX(ctd32tree, ctd32) :
1717 			    ARB_SELFIDX(ctd64tree, ctd64));
1718 		}
1719 
1720 		switch (fmt) {
1721 		case SB_STRFMT_FREEFORM:
1722 			fmtstr = "{mu=";
1723 			break;
1724 		case SB_STRFMT_JSON:
1725 		default:
1726 			fmtstr = "\"mu\":";
1727 			break;
1728 		}
1729 		sbuf_cat(buf, fmtstr);
1730 		Q_TOSTR((is32bit ? ctd32->mu : ctd64->mu), -1, 10, qstr,
1731 		    sizeof(qstr));
1732 		sbuf_cat(buf, qstr);
1733 
1734 		switch (fmt) {
1735 		case SB_STRFMT_FREEFORM:
1736 			fmtstr = is32bit ? ",cnt=%u}" : ",cnt=%ju}";
1737 			break;
1738 		case SB_STRFMT_JSON:
1739 		default:
1740 			fmtstr = is32bit ? ",\"cnt\":%u}" : ",\"cnt\":%ju}";
1741 			break;
1742 		}
1743 		sbuf_printf(buf, fmtstr,
1744 		    is32bit ? ctd32->cnt : (uintmax_t)ctd64->cnt);
1745 
1746 		if (is32bit)
1747 			ctd32 = (objdump ? ARB_CNODE(ctd32tree,
1748 			    ARB_SELFIDX(ctd32tree, ctd32) + 1) :
1749 			    ARB_CNEXT(ctdth32, ctd32tree, ctd32));
1750 		else
1751 			ctd64 = (objdump ? ARB_CNODE(ctd64tree,
1752 			    ARB_SELFIDX(ctd64tree, ctd64) + 1) :
1753 			    ARB_CNEXT(ctdth64, ctd64tree, ctd64));
1754 
1755 		if (fmt == SB_STRFMT_JSON &&
1756 		    (is32bit ? NULL != ctd32 : NULL != ctd64))
1757 			sbuf_putc(buf, ',');
1758 	}
1759 	if (fmt == SB_STRFMT_JSON)
1760 		sbuf_cat(buf, "]");
1761 }
1762 
1763 static inline void
1764 stats_voistatdata_hist_tostr(enum vsd_dtype voi_dtype,
1765     const struct voistatdata_hist *hist, enum vsd_dtype hist_dtype,
1766     size_t hist_dsz, enum sb_str_fmt fmt, struct sbuf *buf, int objdump)
1767 {
1768 	const struct voistatdata_numeric *bkt_lb, *bkt_ub;
1769 	const char *fmtstr;
1770 	int is32bit;
1771 	uint16_t i, nbkts;
1772 
1773 	switch (hist_dtype) {
1774 	case VSD_DTYPE_CRHIST32:
1775 		nbkts = HIST_VSDSZ2NBKTS(crhist32, hist_dsz);
1776 		is32bit = 1;
1777 		break;
1778 	case VSD_DTYPE_DRHIST32:
1779 		nbkts = HIST_VSDSZ2NBKTS(drhist32, hist_dsz);
1780 		is32bit = 1;
1781 		break;
1782 	case VSD_DTYPE_DVHIST32:
1783 		nbkts = HIST_VSDSZ2NBKTS(dvhist32, hist_dsz);
1784 		is32bit = 1;
1785 		break;
1786 	case VSD_DTYPE_CRHIST64:
1787 		nbkts = HIST_VSDSZ2NBKTS(crhist64, hist_dsz);
1788 		is32bit = 0;
1789 		break;
1790 	case VSD_DTYPE_DRHIST64:
1791 		nbkts = HIST_VSDSZ2NBKTS(drhist64, hist_dsz);
1792 		is32bit = 0;
1793 		break;
1794 	case VSD_DTYPE_DVHIST64:
1795 		nbkts = HIST_VSDSZ2NBKTS(dvhist64, hist_dsz);
1796 		is32bit = 0;
1797 		break;
1798 	default:
1799 		return;
1800 	}
1801 
1802 	switch (fmt) {
1803 	case SB_STRFMT_FREEFORM:
1804 		fmtstr = "nbkts=%hu, ";
1805 		break;
1806 	case SB_STRFMT_JSON:
1807 	default:
1808 		fmtstr = "\"nbkts\":%hu,";
1809 		break;
1810 	}
1811 	sbuf_printf(buf, fmtstr, nbkts);
1812 
1813 	switch (fmt) {
1814 		case SB_STRFMT_FREEFORM:
1815 			fmtstr = (is32bit ? "oob=%u" : "oob=%ju");
1816 			break;
1817 		case SB_STRFMT_JSON:
1818 		default:
1819 			fmtstr = (is32bit ? "\"oob\":%u,\"bkts\":[" :
1820 			    "\"oob\":%ju,\"bkts\":[");
1821 			break;
1822 	}
1823 	sbuf_printf(buf, fmtstr, is32bit ? VSD_CONSTHIST_FIELDVAL(hist,
1824 	    hist_dtype, oob) : (uintmax_t)VSD_CONSTHIST_FIELDVAL(hist,
1825 	    hist_dtype, oob));
1826 
1827 	for (i = 0; i < nbkts; i++) {
1828 		switch (hist_dtype) {
1829 		case VSD_DTYPE_CRHIST32:
1830 		case VSD_DTYPE_CRHIST64:
1831 			bkt_lb = VSD_CONSTCRHIST_FIELDPTR(hist, hist_dtype,
1832 			    bkts[i].lb);
1833 			if (i < nbkts - 1)
1834 				bkt_ub = VSD_CONSTCRHIST_FIELDPTR(hist,
1835 				    hist_dtype, bkts[i + 1].lb);
1836 			else
1837 				bkt_ub = &numeric_limits[LIM_MAX][voi_dtype];
1838 			break;
1839 		case VSD_DTYPE_DRHIST32:
1840 		case VSD_DTYPE_DRHIST64:
1841 			bkt_lb = VSD_CONSTDRHIST_FIELDPTR(hist, hist_dtype,
1842 			    bkts[i].lb);
1843 			bkt_ub = VSD_CONSTDRHIST_FIELDPTR(hist, hist_dtype,
1844 			    bkts[i].ub);
1845 			break;
1846 		case VSD_DTYPE_DVHIST32:
1847 		case VSD_DTYPE_DVHIST64:
1848 			bkt_lb = bkt_ub = VSD_CONSTDVHIST_FIELDPTR(hist,
1849 			    hist_dtype, bkts[i].val);
1850 			break;
1851 		default:
1852 			break;
1853 		}
1854 
1855 		switch (fmt) {
1856 		case SB_STRFMT_FREEFORM:
1857 			fmtstr = "\n\t\t\t\t";
1858 			break;
1859 		case SB_STRFMT_JSON:
1860 		default:
1861 			fmtstr = "{";
1862 			break;
1863 		}
1864 		sbuf_cat(buf, fmtstr);
1865 
1866 		if (objdump) {
1867 			switch (fmt) {
1868 			case SB_STRFMT_FREEFORM:
1869 				fmtstr = "bkt[%hu].";
1870 				break;
1871 			case SB_STRFMT_JSON:
1872 			default:
1873 				fmtstr = "\"bkt\":%hu,";
1874 				break;
1875 			}
1876 			sbuf_printf(buf, fmtstr, i);
1877 		}
1878 
1879 		switch (fmt) {
1880 		case SB_STRFMT_FREEFORM:
1881 			fmtstr = "{lb=";
1882 			break;
1883 		case SB_STRFMT_JSON:
1884 		default:
1885 			fmtstr = "\"lb\":";
1886 			break;
1887 		}
1888 		sbuf_cat(buf, fmtstr);
1889 		stats_voistatdata_tostr((const struct voistatdata *)bkt_lb,
1890 		    voi_dtype, voi_dtype, sizeof(struct voistatdata_numeric),
1891 		    fmt, buf, objdump);
1892 
1893 		switch (fmt) {
1894 		case SB_STRFMT_FREEFORM:
1895 			fmtstr = ",ub=";
1896 			break;
1897 		case SB_STRFMT_JSON:
1898 		default:
1899 			fmtstr = ",\"ub\":";
1900 			break;
1901 		}
1902 		sbuf_cat(buf, fmtstr);
1903 		stats_voistatdata_tostr((const struct voistatdata *)bkt_ub,
1904 		    voi_dtype, voi_dtype, sizeof(struct voistatdata_numeric),
1905 		    fmt, buf, objdump);
1906 
1907 		switch (fmt) {
1908 		case SB_STRFMT_FREEFORM:
1909 			fmtstr = is32bit ? ",cnt=%u}" : ",cnt=%ju}";
1910 			break;
1911 		case SB_STRFMT_JSON:
1912 		default:
1913 			fmtstr = is32bit ? ",\"cnt\":%u}" : ",\"cnt\":%ju}";
1914 			break;
1915 		}
1916 		sbuf_printf(buf, fmtstr, is32bit ?
1917 		    VSD_CONSTHIST_FIELDVAL(hist, hist_dtype, bkts[i].cnt) :
1918 		    (uintmax_t)VSD_CONSTHIST_FIELDVAL(hist, hist_dtype,
1919 		    bkts[i].cnt));
1920 
1921 		if (fmt == SB_STRFMT_JSON && i < nbkts - 1)
1922 			sbuf_putc(buf, ',');
1923 	}
1924 	if (fmt == SB_STRFMT_JSON)
1925 		sbuf_cat(buf, "]");
1926 }
1927 
1928 int
1929 stats_voistatdata_tostr(const struct voistatdata *vsd, enum vsd_dtype voi_dtype,
1930     enum vsd_dtype vsd_dtype, size_t vsd_sz, enum sb_str_fmt fmt,
1931     struct sbuf *buf, int objdump)
1932 {
1933 	const char *fmtstr;
1934 
1935 	if (vsd == NULL || buf == NULL || voi_dtype >= VSD_NUM_DTYPES ||
1936 	    vsd_dtype >= VSD_NUM_DTYPES || fmt >= SB_STRFMT_NUM_FMTS)
1937 		return (EINVAL);
1938 
1939 	switch (vsd_dtype) {
1940 	case VSD_DTYPE_VOISTATE:
1941 		switch (fmt) {
1942 		case SB_STRFMT_FREEFORM:
1943 			fmtstr = "prev=";
1944 			break;
1945 		case SB_STRFMT_JSON:
1946 		default:
1947 			fmtstr = "\"prev\":";
1948 			break;
1949 		}
1950 		sbuf_cat(buf, fmtstr);
1951 		/*
1952 		 * Render prev by passing it as *vsd and voi_dtype as vsd_dtype.
1953 		 */
1954 		stats_voistatdata_tostr(
1955 		    (const struct voistatdata *)&CONSTVSD(voistate, vsd)->prev,
1956 		    voi_dtype, voi_dtype, vsd_sz, fmt, buf, objdump);
1957 		break;
1958 	case VSD_DTYPE_INT_S32:
1959 		sbuf_printf(buf, "%d", vsd->int32.s32);
1960 		break;
1961 	case VSD_DTYPE_INT_U32:
1962 		sbuf_printf(buf, "%u", vsd->int32.u32);
1963 		break;
1964 	case VSD_DTYPE_INT_S64:
1965 		sbuf_printf(buf, "%jd", (intmax_t)vsd->int64.s64);
1966 		break;
1967 	case VSD_DTYPE_INT_U64:
1968 		sbuf_printf(buf, "%ju", (uintmax_t)vsd->int64.u64);
1969 		break;
1970 	case VSD_DTYPE_INT_SLONG:
1971 		sbuf_printf(buf, "%ld", vsd->intlong.slong);
1972 		break;
1973 	case VSD_DTYPE_INT_ULONG:
1974 		sbuf_printf(buf, "%lu", vsd->intlong.ulong);
1975 		break;
1976 	case VSD_DTYPE_Q_S32:
1977 		{
1978 		char qstr[Q_MAXSTRLEN(vsd->q32.sq32, 10)];
1979 		Q_TOSTR((s32q_t)vsd->q32.sq32, -1, 10, qstr, sizeof(qstr));
1980 		sbuf_cat(buf, qstr);
1981 		}
1982 		break;
1983 	case VSD_DTYPE_Q_U32:
1984 		{
1985 		char qstr[Q_MAXSTRLEN(vsd->q32.uq32, 10)];
1986 		Q_TOSTR((u32q_t)vsd->q32.uq32, -1, 10, qstr, sizeof(qstr));
1987 		sbuf_cat(buf, qstr);
1988 		}
1989 		break;
1990 	case VSD_DTYPE_Q_S64:
1991 		{
1992 		char qstr[Q_MAXSTRLEN(vsd->q64.sq64, 10)];
1993 		Q_TOSTR((s64q_t)vsd->q64.sq64, -1, 10, qstr, sizeof(qstr));
1994 		sbuf_cat(buf, qstr);
1995 		}
1996 		break;
1997 	case VSD_DTYPE_Q_U64:
1998 		{
1999 		char qstr[Q_MAXSTRLEN(vsd->q64.uq64, 10)];
2000 		Q_TOSTR((u64q_t)vsd->q64.uq64, -1, 10, qstr, sizeof(qstr));
2001 		sbuf_cat(buf, qstr);
2002 		}
2003 		break;
2004 	case VSD_DTYPE_CRHIST32:
2005 	case VSD_DTYPE_DRHIST32:
2006 	case VSD_DTYPE_DVHIST32:
2007 	case VSD_DTYPE_CRHIST64:
2008 	case VSD_DTYPE_DRHIST64:
2009 	case VSD_DTYPE_DVHIST64:
2010 		stats_voistatdata_hist_tostr(voi_dtype, CONSTVSD(hist, vsd),
2011 		    vsd_dtype, vsd_sz, fmt, buf, objdump);
2012 		break;
2013 	case VSD_DTYPE_TDGSTCLUST32:
2014 	case VSD_DTYPE_TDGSTCLUST64:
2015 		stats_voistatdata_tdgst_tostr(voi_dtype,
2016 		    CONSTVSD(tdgst, vsd), vsd_dtype, vsd_sz, fmt, buf,
2017 		    objdump);
2018 		break;
2019 	default:
2020 		break;
2021 	}
2022 
2023 	return (sbuf_error(buf));
2024 }
2025 
2026 static void
2027 stats_v1_itercb_tostr_freeform(struct statsblobv1 *sb, struct voi *v,
2028     struct voistat *vs, struct sb_iter_ctx *ctx)
2029 {
2030 	struct sb_tostrcb_ctx *sctx;
2031 	struct metablob *tpl_mb;
2032 	struct sbuf *buf;
2033 	void *vsd;
2034 	uint8_t dump;
2035 
2036 	sctx = ctx->usrctx;
2037 	buf = sctx->buf;
2038 	tpl_mb = sctx->tpl ? sctx->tpl->mb : NULL;
2039 	dump = ((sctx->flags & SB_TOSTR_OBJDUMP) != 0);
2040 
2041 	if (ctx->flags & SB_IT_FIRST_CB) {
2042 		sbuf_printf(buf, "struct statsblobv1@%p", sb);
2043 		if (dump) {
2044 			sbuf_printf(buf, ", abi=%hhu, endian=%hhu, maxsz=%hu, "
2045 			    "cursz=%hu, created=%jd, lastrst=%jd, flags=0x%04hx, "
2046 			    "stats_off=%hu, statsdata_off=%hu",
2047 			    sb->abi, sb->endian, sb->maxsz, sb->cursz,
2048 			    sb->created, sb->lastrst, sb->flags, sb->stats_off,
2049 			    sb->statsdata_off);
2050 		}
2051 		sbuf_printf(buf, ", tplhash=%u", sb->tplhash);
2052 	}
2053 
2054 	if (ctx->flags & SB_IT_FIRST_VOISTAT) {
2055 		sbuf_printf(buf, "\n\tvois[%hd]: id=%hd", ctx->vslot, v->id);
2056 		if (v->id < 0)
2057 			return;
2058 		sbuf_printf(buf, ", name=\"%s\"", (tpl_mb == NULL) ? "" :
2059 		    tpl_mb->voi_meta[v->id].name);
2060 		if (dump)
2061 		    sbuf_printf(buf, ", flags=0x%04hx, dtype=%s, "
2062 		    "voistatmaxid=%hhd, stats_off=%hu", v->flags,
2063 		    vsd_dtype2name[v->dtype], v->voistatmaxid, v->stats_off);
2064 	}
2065 
2066 	if (!dump && vs->stype <= 0)
2067 		return;
2068 
2069 	sbuf_printf(buf, "\n\t\tvois[%hd]stat[%hhd]: stype=", v->id, ctx->vsslot);
2070 	if (vs->stype < 0) {
2071 		sbuf_printf(buf, "%hhd", vs->stype);
2072 		return;
2073 	} else
2074 		sbuf_printf(buf, "%s, errs=%hu", vs_stype2name[vs->stype],
2075 		    vs->errs);
2076 	vsd = BLOB_OFFSET(sb, vs->data_off);
2077 	if (dump)
2078 		sbuf_printf(buf, ", flags=0x%04x, dtype=%s, dsz=%hu, "
2079 		    "data_off=%hu", vs->flags, vsd_dtype2name[vs->dtype],
2080 		    vs->dsz, vs->data_off);
2081 
2082 	sbuf_printf(buf, "\n\t\t\tvoistatdata: ");
2083 	stats_voistatdata_tostr(vsd, v->dtype, vs->dtype, vs->dsz,
2084 	    sctx->fmt, buf, dump);
2085 }
2086 
2087 static void
2088 stats_v1_itercb_tostr_json(struct statsblobv1 *sb, struct voi *v, struct voistat *vs,
2089     struct sb_iter_ctx *ctx)
2090 {
2091 	struct sb_tostrcb_ctx *sctx;
2092 	struct metablob *tpl_mb;
2093 	struct sbuf *buf;
2094 	const char *fmtstr;
2095 	void *vsd;
2096 	uint8_t dump;
2097 
2098 	sctx = ctx->usrctx;
2099 	buf = sctx->buf;
2100 	tpl_mb = sctx->tpl ? sctx->tpl->mb : NULL;
2101 	dump = ((sctx->flags & SB_TOSTR_OBJDUMP) != 0);
2102 
2103 	if (ctx->flags & SB_IT_FIRST_CB) {
2104 		sbuf_putc(buf, '{');
2105 		if (dump) {
2106 			sbuf_printf(buf, "\"abi\":%hhu,\"endian\":%hhu,"
2107 			    "\"maxsz\":%hu,\"cursz\":%hu,\"created\":%jd,"
2108 			    "\"lastrst\":%jd,\"flags\":%hu,\"stats_off\":%hu,"
2109 			    "\"statsdata_off\":%hu,", sb->abi,
2110 			    sb->endian, sb->maxsz, sb->cursz, sb->created,
2111 			    sb->lastrst, sb->flags, sb->stats_off,
2112 			    sb->statsdata_off);
2113 		}
2114 
2115 		if (tpl_mb == NULL)
2116 			fmtstr = "\"tplname\":%s,\"tplhash\":%u,\"vois\":{";
2117 		else
2118 			fmtstr = "\"tplname\":\"%s\",\"tplhash\":%u,\"vois\":{";
2119 
2120 		sbuf_printf(buf, fmtstr, tpl_mb ? tpl_mb->tplname : "null",
2121 		    sb->tplhash);
2122 	}
2123 
2124 	if (ctx->flags & SB_IT_FIRST_VOISTAT) {
2125 		if (dump) {
2126 			sbuf_printf(buf, "\"[%d]\":{\"id\":%d", ctx->vslot,
2127 			    v->id);
2128 			if (v->id < 0) {
2129 				sbuf_printf(buf, "},");
2130 				return;
2131 			}
2132 
2133 			if (tpl_mb == NULL)
2134 				fmtstr = ",\"name\":%s,\"flags\":%hu,"
2135 				    "\"dtype\":\"%s\",\"voistatmaxid\":%hhd,"
2136 				    "\"stats_off\":%hu,";
2137 			else
2138 				fmtstr = ",\"name\":\"%s\",\"flags\":%hu,"
2139 				    "\"dtype\":\"%s\",\"voistatmaxid\":%hhd,"
2140 				    "\"stats_off\":%hu,";
2141 
2142 			sbuf_printf(buf, fmtstr, tpl_mb ?
2143 			    tpl_mb->voi_meta[v->id].name : "null", v->flags,
2144 			    vsd_dtype2name[v->dtype], v->voistatmaxid,
2145 			    v->stats_off);
2146 		} else {
2147 			if (tpl_mb == NULL) {
2148 				sbuf_printf(buf, "\"[%hd]\":{", v->id);
2149 			} else {
2150 				sbuf_printf(buf, "\"%s\":{",
2151 				    tpl_mb->voi_meta[v->id].name);
2152 			}
2153 		}
2154 		sbuf_cat(buf, "\"stats\":{");
2155 	}
2156 
2157 	vsd = BLOB_OFFSET(sb, vs->data_off);
2158 	if (dump) {
2159 		sbuf_printf(buf, "\"[%hhd]\":", ctx->vsslot);
2160 		if (vs->stype < 0) {
2161 			sbuf_printf(buf, "{\"stype\":-1},");
2162 			return;
2163 		}
2164 		sbuf_printf(buf, "{\"stype\":\"%s\",\"errs\":%hu,\"flags\":%hu,"
2165 		    "\"dtype\":\"%s\",\"data_off\":%hu,\"voistatdata\":{",
2166 		    vs_stype2name[vs->stype], vs->errs, vs->flags,
2167 		    vsd_dtype2name[vs->dtype], vs->data_off);
2168 	} else if (vs->stype > 0) {
2169 		if (tpl_mb == NULL)
2170 			sbuf_printf(buf, "\"[%hhd]\":", vs->stype);
2171 		else
2172 			sbuf_printf(buf, "\"%s\":", vs_stype2name[vs->stype]);
2173 	} else
2174 		return;
2175 
2176 	if ((vs->flags & VS_VSDVALID) || dump) {
2177 		if (!dump)
2178 			sbuf_printf(buf, "{\"errs\":%hu,", vs->errs);
2179 		/* Simple non-compound VSD types need a key. */
2180 		if (!vsd_compoundtype[vs->dtype])
2181 			sbuf_cat(buf, "\"val\":");
2182 		stats_voistatdata_tostr(vsd, v->dtype, vs->dtype, vs->dsz,
2183 		    sctx->fmt, buf, dump);
2184 		sbuf_cat(buf, dump ? "}}" : "}");
2185 	} else
2186 		sbuf_cat(buf, dump ? "null}" : "null");
2187 
2188 	if (ctx->flags & SB_IT_LAST_VOISTAT)
2189 		sbuf_cat(buf, "}}");
2190 
2191 	if (ctx->flags & SB_IT_LAST_CB)
2192 		sbuf_cat(buf, "}}");
2193 	else
2194 		sbuf_putc(buf, ',');
2195 }
2196 
2197 static int
2198 stats_v1_itercb_tostr(struct statsblobv1 *sb, struct voi *v, struct voistat *vs,
2199     struct sb_iter_ctx *ctx)
2200 {
2201 	struct sb_tostrcb_ctx *sctx;
2202 
2203 	sctx = ctx->usrctx;
2204 
2205 	switch (sctx->fmt) {
2206 	case SB_STRFMT_FREEFORM:
2207 		stats_v1_itercb_tostr_freeform(sb, v, vs, ctx);
2208 		break;
2209 	case SB_STRFMT_JSON:
2210 		stats_v1_itercb_tostr_json(sb, v, vs, ctx);
2211 		break;
2212 	default:
2213 		break;
2214 	}
2215 
2216 	return (sbuf_error(sctx->buf));
2217 }
2218 
2219 int
2220 stats_v1_blob_tostr(struct statsblobv1 *sb, struct sbuf *buf,
2221     enum sb_str_fmt fmt, uint32_t flags)
2222 {
2223 	struct sb_tostrcb_ctx sctx;
2224 	uint32_t iflags;
2225 
2226 	if (sb == NULL || sb->abi != STATS_ABI_V1 || buf == NULL ||
2227 	    fmt >= SB_STRFMT_NUM_FMTS)
2228 		return (EINVAL);
2229 
2230 	sctx.buf = buf;
2231 	sctx.fmt = fmt;
2232 	sctx.flags = flags;
2233 
2234 	if (flags & SB_TOSTR_META) {
2235 		if (stats_tpl_fetch(stats_tpl_fetch_allocid(NULL, sb->tplhash),
2236 		    &sctx.tpl))
2237 			return (EINVAL);
2238 	} else
2239 		sctx.tpl = NULL;
2240 
2241 	iflags = 0;
2242 	if (flags & SB_TOSTR_OBJDUMP)
2243 		iflags |= (SB_IT_NULLVOI | SB_IT_NULLVOISTAT);
2244 	stats_v1_blob_iter(sb, stats_v1_itercb_tostr, &sctx, iflags);
2245 
2246 	return (sbuf_error(buf));
2247 }
2248 
2249 static int
2250 stats_v1_itercb_visit(struct statsblobv1 *sb, struct voi *v,
2251     struct voistat *vs, struct sb_iter_ctx *ctx)
2252 {
2253 	struct sb_visitcb_ctx *vctx;
2254 	struct sb_visit sbv;
2255 
2256 	vctx = ctx->usrctx;
2257 
2258 	sbv.tplhash = sb->tplhash;
2259 	sbv.voi_id = v->id;
2260 	sbv.voi_dtype = v->dtype;
2261 	sbv.vs_stype = vs->stype;
2262 	sbv.vs_dtype = vs->dtype;
2263 	sbv.vs_dsz = vs->dsz;
2264 	sbv.vs_data = BLOB_OFFSET(sb, vs->data_off);
2265 	sbv.vs_errs = vs->errs;
2266 	sbv.flags = ctx->flags & (SB_IT_FIRST_CB | SB_IT_LAST_CB |
2267 	    SB_IT_FIRST_VOI | SB_IT_LAST_VOI | SB_IT_FIRST_VOISTAT |
2268 	    SB_IT_LAST_VOISTAT);
2269 
2270 	return (vctx->cb(&sbv, vctx->usrctx));
2271 }
2272 
2273 int
2274 stats_v1_blob_visit(struct statsblobv1 *sb, stats_blob_visitcb_t func,
2275     void *usrctx)
2276 {
2277 	struct sb_visitcb_ctx vctx;
2278 
2279 	if (sb == NULL || sb->abi != STATS_ABI_V1 || func == NULL)
2280 		return (EINVAL);
2281 
2282 	vctx.cb = func;
2283 	vctx.usrctx = usrctx;
2284 
2285 	stats_v1_blob_iter(sb, stats_v1_itercb_visit, &vctx, 0);
2286 
2287 	return (0);
2288 }
2289 
2290 static int
2291 stats_v1_icb_reset_voistat(struct statsblobv1 *sb, struct voi *v __unused,
2292     struct voistat *vs, struct sb_iter_ctx *ctx __unused)
2293 {
2294 	void *vsd;
2295 
2296 	if (vs->stype == VS_STYPE_VOISTATE)
2297 		return (0);
2298 
2299 	vsd = BLOB_OFFSET(sb, vs->data_off);
2300 
2301 	/* Perform the stat type's default reset action. */
2302 	switch (vs->stype) {
2303 	case VS_STYPE_SUM:
2304 		switch (vs->dtype) {
2305 		case VSD_DTYPE_Q_S32:
2306 			Q_SIFVAL(VSD(q32, vsd)->sq32, 0);
2307 			break;
2308 		case VSD_DTYPE_Q_U32:
2309 			Q_SIFVAL(VSD(q32, vsd)->uq32, 0);
2310 			break;
2311 		case VSD_DTYPE_Q_S64:
2312 			Q_SIFVAL(VSD(q64, vsd)->sq64, 0);
2313 			break;
2314 		case VSD_DTYPE_Q_U64:
2315 			Q_SIFVAL(VSD(q64, vsd)->uq64, 0);
2316 			break;
2317 		default:
2318 			bzero(vsd, vs->dsz);
2319 			break;
2320 		}
2321 		break;
2322 	case VS_STYPE_MAX:
2323 		switch (vs->dtype) {
2324 		case VSD_DTYPE_Q_S32:
2325 			Q_SIFVAL(VSD(q32, vsd)->sq32,
2326 			    Q_IFMINVAL(VSD(q32, vsd)->sq32));
2327 			break;
2328 		case VSD_DTYPE_Q_U32:
2329 			Q_SIFVAL(VSD(q32, vsd)->uq32,
2330 			    Q_IFMINVAL(VSD(q32, vsd)->uq32));
2331 			break;
2332 		case VSD_DTYPE_Q_S64:
2333 			Q_SIFVAL(VSD(q64, vsd)->sq64,
2334 			    Q_IFMINVAL(VSD(q64, vsd)->sq64));
2335 			break;
2336 		case VSD_DTYPE_Q_U64:
2337 			Q_SIFVAL(VSD(q64, vsd)->uq64,
2338 			    Q_IFMINVAL(VSD(q64, vsd)->uq64));
2339 			break;
2340 		default:
2341 			memcpy(vsd, &numeric_limits[LIM_MIN][vs->dtype],
2342 			    vs->dsz);
2343 			break;
2344 		}
2345 		break;
2346 	case VS_STYPE_MIN:
2347 		switch (vs->dtype) {
2348 		case VSD_DTYPE_Q_S32:
2349 			Q_SIFVAL(VSD(q32, vsd)->sq32,
2350 			    Q_IFMAXVAL(VSD(q32, vsd)->sq32));
2351 			break;
2352 		case VSD_DTYPE_Q_U32:
2353 			Q_SIFVAL(VSD(q32, vsd)->uq32,
2354 			    Q_IFMAXVAL(VSD(q32, vsd)->uq32));
2355 			break;
2356 		case VSD_DTYPE_Q_S64:
2357 			Q_SIFVAL(VSD(q64, vsd)->sq64,
2358 			    Q_IFMAXVAL(VSD(q64, vsd)->sq64));
2359 			break;
2360 		case VSD_DTYPE_Q_U64:
2361 			Q_SIFVAL(VSD(q64, vsd)->uq64,
2362 			    Q_IFMAXVAL(VSD(q64, vsd)->uq64));
2363 			break;
2364 		default:
2365 			memcpy(vsd, &numeric_limits[LIM_MAX][vs->dtype],
2366 			    vs->dsz);
2367 			break;
2368 		}
2369 		break;
2370 	case VS_STYPE_HIST:
2371 		{
2372 		/* Reset bucket counts. */
2373 		struct voistatdata_hist *hist;
2374 		int i, is32bit;
2375 		uint16_t nbkts;
2376 
2377 		hist = VSD(hist, vsd);
2378 		switch (vs->dtype) {
2379 		case VSD_DTYPE_CRHIST32:
2380 			nbkts = HIST_VSDSZ2NBKTS(crhist32, vs->dsz);
2381 			is32bit = 1;
2382 			break;
2383 		case VSD_DTYPE_DRHIST32:
2384 			nbkts = HIST_VSDSZ2NBKTS(drhist32, vs->dsz);
2385 			is32bit = 1;
2386 			break;
2387 		case VSD_DTYPE_DVHIST32:
2388 			nbkts = HIST_VSDSZ2NBKTS(dvhist32, vs->dsz);
2389 			is32bit = 1;
2390 			break;
2391 		case VSD_DTYPE_CRHIST64:
2392 			nbkts = HIST_VSDSZ2NBKTS(crhist64, vs->dsz);
2393 			is32bit = 0;
2394 			break;
2395 		case VSD_DTYPE_DRHIST64:
2396 			nbkts = HIST_VSDSZ2NBKTS(drhist64, vs->dsz);
2397 			is32bit = 0;
2398 			break;
2399 		case VSD_DTYPE_DVHIST64:
2400 			nbkts = HIST_VSDSZ2NBKTS(dvhist64, vs->dsz);
2401 			is32bit = 0;
2402 			break;
2403 		default:
2404 			return (0);
2405 		}
2406 
2407 		bzero(VSD_HIST_FIELDPTR(hist, vs->dtype, oob),
2408 		    is32bit ? sizeof(uint32_t) : sizeof(uint64_t));
2409 		for (i = nbkts - 1; i >= 0; i--) {
2410 			bzero(VSD_HIST_FIELDPTR(hist, vs->dtype,
2411 			    bkts[i].cnt), is32bit ? sizeof(uint32_t) :
2412 			    sizeof(uint64_t));
2413 		}
2414 		break;
2415 		}
2416 	case VS_STYPE_TDGST:
2417 		{
2418 		/* Reset sample count centroids array/tree. */
2419 		struct voistatdata_tdgst *tdgst;
2420 		struct ctdth32 *ctd32tree;
2421 		struct ctdth64 *ctd64tree;
2422 		struct voistatdata_tdgstctd32 *ctd32;
2423 		struct voistatdata_tdgstctd64 *ctd64;
2424 
2425 		tdgst = VSD(tdgst, vsd);
2426 		switch (vs->dtype) {
2427 		case VSD_DTYPE_TDGSTCLUST32:
2428 			VSD(tdgstclust32, tdgst)->smplcnt = 0;
2429 			VSD(tdgstclust32, tdgst)->compcnt = 0;
2430 			ctd32tree = &VSD(tdgstclust32, tdgst)->ctdtree;
2431 			ARB_INIT(ctd32, ctdlnk, ctd32tree,
2432 			    ARB_MAXNODES(ctd32tree)) {
2433 				ctd32->cnt = 0;
2434 				Q_SIFVAL(ctd32->mu, 0);
2435 			}
2436 #ifdef DIAGNOSTIC
2437 			RB_INIT(&VSD(tdgstclust32, tdgst)->rbctdtree);
2438 #endif
2439 		break;
2440 		case VSD_DTYPE_TDGSTCLUST64:
2441 			VSD(tdgstclust64, tdgst)->smplcnt = 0;
2442 			VSD(tdgstclust64, tdgst)->compcnt = 0;
2443 			ctd64tree = &VSD(tdgstclust64, tdgst)->ctdtree;
2444 			ARB_INIT(ctd64, ctdlnk, ctd64tree,
2445 			    ARB_MAXNODES(ctd64tree)) {
2446 				ctd64->cnt = 0;
2447 				Q_SIFVAL(ctd64->mu, 0);
2448 			}
2449 #ifdef DIAGNOSTIC
2450 			RB_INIT(&VSD(tdgstclust64, tdgst)->rbctdtree);
2451 #endif
2452 		break;
2453 		default:
2454 			return (0);
2455 		}
2456 		break;
2457 		}
2458 	default:
2459 		KASSERT(0, ("Unknown VOI stat type %d", vs->stype));
2460 		break;
2461 	}
2462 
2463 	vs->errs = 0;
2464 	vs->flags &= ~VS_VSDVALID;
2465 
2466 	return (0);
2467 }
2468 
2469 int
2470 stats_v1_blob_snapshot(struct statsblobv1 **dst, size_t dstmaxsz,
2471     struct statsblobv1 *src, uint32_t flags)
2472 {
2473 	int error;
2474 
2475 	if (src != NULL && src->abi == STATS_ABI_V1) {
2476 		error = stats_v1_blob_clone(dst, dstmaxsz, src, flags);
2477 		if (!error) {
2478 			if (flags & SB_CLONE_RSTSRC) {
2479 				stats_v1_blob_iter(src,
2480 				    stats_v1_icb_reset_voistat, NULL, 0);
2481 				src->lastrst = stats_sbinuptime();
2482 			}
2483 			stats_v1_blob_finalise(*dst);
2484 		}
2485 	} else
2486 		error = EINVAL;
2487 
2488 	return (error);
2489 }
2490 
2491 static inline int
2492 stats_v1_voi_update_max(enum vsd_dtype voi_dtype __unused,
2493     struct voistatdata *voival, struct voistat *vs, void *vsd)
2494 {
2495 	int error;
2496 
2497 	KASSERT(vs->dtype < VSD_NUM_DTYPES,
2498 	    ("Unknown VSD dtype %d", vs->dtype));
2499 
2500 	error = 0;
2501 
2502 	switch (vs->dtype) {
2503 	case VSD_DTYPE_INT_S32:
2504 		if (VSD(int32, vsd)->s32 < voival->int32.s32) {
2505 			VSD(int32, vsd)->s32 = voival->int32.s32;
2506 			vs->flags |= VS_VSDVALID;
2507 		}
2508 		break;
2509 	case VSD_DTYPE_INT_U32:
2510 		if (VSD(int32, vsd)->u32 < voival->int32.u32) {
2511 			VSD(int32, vsd)->u32 = voival->int32.u32;
2512 			vs->flags |= VS_VSDVALID;
2513 		}
2514 		break;
2515 	case VSD_DTYPE_INT_S64:
2516 		if (VSD(int64, vsd)->s64 < voival->int64.s64) {
2517 			VSD(int64, vsd)->s64 = voival->int64.s64;
2518 			vs->flags |= VS_VSDVALID;
2519 		}
2520 		break;
2521 	case VSD_DTYPE_INT_U64:
2522 		if (VSD(int64, vsd)->u64 < voival->int64.u64) {
2523 			VSD(int64, vsd)->u64 = voival->int64.u64;
2524 			vs->flags |= VS_VSDVALID;
2525 		}
2526 		break;
2527 	case VSD_DTYPE_INT_SLONG:
2528 		if (VSD(intlong, vsd)->slong < voival->intlong.slong) {
2529 			VSD(intlong, vsd)->slong = voival->intlong.slong;
2530 			vs->flags |= VS_VSDVALID;
2531 		}
2532 		break;
2533 	case VSD_DTYPE_INT_ULONG:
2534 		if (VSD(intlong, vsd)->ulong < voival->intlong.ulong) {
2535 			VSD(intlong, vsd)->ulong = voival->intlong.ulong;
2536 			vs->flags |= VS_VSDVALID;
2537 		}
2538 		break;
2539 	case VSD_DTYPE_Q_S32:
2540 		if (Q_QLTQ(VSD(q32, vsd)->sq32, voival->q32.sq32) &&
2541 		    (0 == (error = Q_QCPYVALQ(&VSD(q32, vsd)->sq32,
2542 		    voival->q32.sq32)))) {
2543 			vs->flags |= VS_VSDVALID;
2544 		}
2545 		break;
2546 	case VSD_DTYPE_Q_U32:
2547 		if (Q_QLTQ(VSD(q32, vsd)->uq32, voival->q32.uq32) &&
2548 		    (0 == (error = Q_QCPYVALQ(&VSD(q32, vsd)->uq32,
2549 		    voival->q32.uq32)))) {
2550 			vs->flags |= VS_VSDVALID;
2551 		}
2552 		break;
2553 	case VSD_DTYPE_Q_S64:
2554 		if (Q_QLTQ(VSD(q64, vsd)->sq64, voival->q64.sq64) &&
2555 		    (0 == (error = Q_QCPYVALQ(&VSD(q64, vsd)->sq64,
2556 		    voival->q64.sq64)))) {
2557 			vs->flags |= VS_VSDVALID;
2558 		}
2559 		break;
2560 	case VSD_DTYPE_Q_U64:
2561 		if (Q_QLTQ(VSD(q64, vsd)->uq64, voival->q64.uq64) &&
2562 		    (0 == (error = Q_QCPYVALQ(&VSD(q64, vsd)->uq64,
2563 		    voival->q64.uq64)))) {
2564 			vs->flags |= VS_VSDVALID;
2565 		}
2566 		break;
2567 	default:
2568 		error = EINVAL;
2569 		break;
2570 	}
2571 
2572 	return (error);
2573 }
2574 
2575 static inline int
2576 stats_v1_voi_update_min(enum vsd_dtype voi_dtype __unused,
2577     struct voistatdata *voival, struct voistat *vs, void *vsd)
2578 {
2579 	int error;
2580 
2581 	KASSERT(vs->dtype < VSD_NUM_DTYPES,
2582 	    ("Unknown VSD dtype %d", vs->dtype));
2583 
2584 	error = 0;
2585 
2586 	switch (vs->dtype) {
2587 	case VSD_DTYPE_INT_S32:
2588 		if (VSD(int32, vsd)->s32 > voival->int32.s32) {
2589 			VSD(int32, vsd)->s32 = voival->int32.s32;
2590 			vs->flags |= VS_VSDVALID;
2591 		}
2592 		break;
2593 	case VSD_DTYPE_INT_U32:
2594 		if (VSD(int32, vsd)->u32 > voival->int32.u32) {
2595 			VSD(int32, vsd)->u32 = voival->int32.u32;
2596 			vs->flags |= VS_VSDVALID;
2597 		}
2598 		break;
2599 	case VSD_DTYPE_INT_S64:
2600 		if (VSD(int64, vsd)->s64 > voival->int64.s64) {
2601 			VSD(int64, vsd)->s64 = voival->int64.s64;
2602 			vs->flags |= VS_VSDVALID;
2603 		}
2604 		break;
2605 	case VSD_DTYPE_INT_U64:
2606 		if (VSD(int64, vsd)->u64 > voival->int64.u64) {
2607 			VSD(int64, vsd)->u64 = voival->int64.u64;
2608 			vs->flags |= VS_VSDVALID;
2609 		}
2610 		break;
2611 	case VSD_DTYPE_INT_SLONG:
2612 		if (VSD(intlong, vsd)->slong > voival->intlong.slong) {
2613 			VSD(intlong, vsd)->slong = voival->intlong.slong;
2614 			vs->flags |= VS_VSDVALID;
2615 		}
2616 		break;
2617 	case VSD_DTYPE_INT_ULONG:
2618 		if (VSD(intlong, vsd)->ulong > voival->intlong.ulong) {
2619 			VSD(intlong, vsd)->ulong = voival->intlong.ulong;
2620 			vs->flags |= VS_VSDVALID;
2621 		}
2622 		break;
2623 	case VSD_DTYPE_Q_S32:
2624 		if (Q_QGTQ(VSD(q32, vsd)->sq32, voival->q32.sq32) &&
2625 		    (0 == (error = Q_QCPYVALQ(&VSD(q32, vsd)->sq32,
2626 		    voival->q32.sq32)))) {
2627 			vs->flags |= VS_VSDVALID;
2628 		}
2629 		break;
2630 	case VSD_DTYPE_Q_U32:
2631 		if (Q_QGTQ(VSD(q32, vsd)->uq32, voival->q32.uq32) &&
2632 		    (0 == (error = Q_QCPYVALQ(&VSD(q32, vsd)->uq32,
2633 		    voival->q32.uq32)))) {
2634 			vs->flags |= VS_VSDVALID;
2635 		}
2636 		break;
2637 	case VSD_DTYPE_Q_S64:
2638 		if (Q_QGTQ(VSD(q64, vsd)->sq64, voival->q64.sq64) &&
2639 		    (0 == (error = Q_QCPYVALQ(&VSD(q64, vsd)->sq64,
2640 		    voival->q64.sq64)))) {
2641 			vs->flags |= VS_VSDVALID;
2642 		}
2643 		break;
2644 	case VSD_DTYPE_Q_U64:
2645 		if (Q_QGTQ(VSD(q64, vsd)->uq64, voival->q64.uq64) &&
2646 		    (0 == (error = Q_QCPYVALQ(&VSD(q64, vsd)->uq64,
2647 		    voival->q64.uq64)))) {
2648 			vs->flags |= VS_VSDVALID;
2649 		}
2650 		break;
2651 	default:
2652 		error = EINVAL;
2653 		break;
2654 	}
2655 
2656 	return (error);
2657 }
2658 
2659 static inline int
2660 stats_v1_voi_update_sum(enum vsd_dtype voi_dtype __unused,
2661     struct voistatdata *voival, struct voistat *vs, void *vsd)
2662 {
2663 	int error;
2664 
2665 	KASSERT(vs->dtype < VSD_NUM_DTYPES,
2666 	    ("Unknown VSD dtype %d", vs->dtype));
2667 
2668 	error = 0;
2669 
2670 	switch (vs->dtype) {
2671 	case VSD_DTYPE_INT_S32:
2672 		VSD(int32, vsd)->s32 += voival->int32.s32;
2673 		break;
2674 	case VSD_DTYPE_INT_U32:
2675 		VSD(int32, vsd)->u32 += voival->int32.u32;
2676 		break;
2677 	case VSD_DTYPE_INT_S64:
2678 		VSD(int64, vsd)->s64 += voival->int64.s64;
2679 		break;
2680 	case VSD_DTYPE_INT_U64:
2681 		VSD(int64, vsd)->u64 += voival->int64.u64;
2682 		break;
2683 	case VSD_DTYPE_INT_SLONG:
2684 		VSD(intlong, vsd)->slong += voival->intlong.slong;
2685 		break;
2686 	case VSD_DTYPE_INT_ULONG:
2687 		VSD(intlong, vsd)->ulong += voival->intlong.ulong;
2688 		break;
2689 	case VSD_DTYPE_Q_S32:
2690 		error = Q_QADDQ(&VSD(q32, vsd)->sq32, voival->q32.sq32);
2691 		break;
2692 	case VSD_DTYPE_Q_U32:
2693 		error = Q_QADDQ(&VSD(q32, vsd)->uq32, voival->q32.uq32);
2694 		break;
2695 	case VSD_DTYPE_Q_S64:
2696 		error = Q_QADDQ(&VSD(q64, vsd)->sq64, voival->q64.sq64);
2697 		break;
2698 	case VSD_DTYPE_Q_U64:
2699 		error = Q_QADDQ(&VSD(q64, vsd)->uq64, voival->q64.uq64);
2700 		break;
2701 	default:
2702 		error = EINVAL;
2703 		break;
2704 	}
2705 
2706 	if (!error)
2707 		vs->flags |= VS_VSDVALID;
2708 
2709 	return (error);
2710 }
2711 
2712 static inline int
2713 stats_v1_voi_update_hist(enum vsd_dtype voi_dtype, struct voistatdata *voival,
2714     struct voistat *vs, struct voistatdata_hist *hist)
2715 {
2716 	struct voistatdata_numeric *bkt_lb, *bkt_ub;
2717 	uint64_t *oob64, *cnt64;
2718 	uint32_t *oob32, *cnt32;
2719 	int error, i, found, is32bit, has_ub, eq_only;
2720 
2721 	error = 0;
2722 
2723 	switch (vs->dtype) {
2724 	case VSD_DTYPE_CRHIST32:
2725 		i = HIST_VSDSZ2NBKTS(crhist32, vs->dsz);
2726 		is32bit = 1;
2727 		has_ub = eq_only = 0;
2728 		oob32 = &VSD(crhist32, hist)->oob;
2729 		break;
2730 	case VSD_DTYPE_DRHIST32:
2731 		i = HIST_VSDSZ2NBKTS(drhist32, vs->dsz);
2732 		is32bit = has_ub = 1;
2733 		eq_only = 0;
2734 		oob32 = &VSD(drhist32, hist)->oob;
2735 		break;
2736 	case VSD_DTYPE_DVHIST32:
2737 		i = HIST_VSDSZ2NBKTS(dvhist32, vs->dsz);
2738 		is32bit = eq_only = 1;
2739 		has_ub = 0;
2740 		oob32 = &VSD(dvhist32, hist)->oob;
2741 		break;
2742 	case VSD_DTYPE_CRHIST64:
2743 		i = HIST_VSDSZ2NBKTS(crhist64, vs->dsz);
2744 		is32bit = has_ub = eq_only = 0;
2745 		oob64 = &VSD(crhist64, hist)->oob;
2746 		break;
2747 	case VSD_DTYPE_DRHIST64:
2748 		i = HIST_VSDSZ2NBKTS(drhist64, vs->dsz);
2749 		is32bit = eq_only = 0;
2750 		has_ub = 1;
2751 		oob64 = &VSD(drhist64, hist)->oob;
2752 		break;
2753 	case VSD_DTYPE_DVHIST64:
2754 		i = HIST_VSDSZ2NBKTS(dvhist64, vs->dsz);
2755 		is32bit = has_ub = 0;
2756 		eq_only = 1;
2757 		oob64 = &VSD(dvhist64, hist)->oob;
2758 		break;
2759 	default:
2760 		return (EINVAL);
2761 	}
2762 	i--; /* Adjust for 0-based array index. */
2763 
2764 	/* XXXLAS: Should probably use a better bucket search algorithm. ARB? */
2765 	for (found = 0; i >= 0 && !found; i--) {
2766 		switch (vs->dtype) {
2767 		case VSD_DTYPE_CRHIST32:
2768 			bkt_lb = &VSD(crhist32, hist)->bkts[i].lb;
2769 			cnt32 = &VSD(crhist32, hist)->bkts[i].cnt;
2770 			break;
2771 		case VSD_DTYPE_DRHIST32:
2772 			bkt_lb = &VSD(drhist32, hist)->bkts[i].lb;
2773 			bkt_ub = &VSD(drhist32, hist)->bkts[i].ub;
2774 			cnt32 = &VSD(drhist32, hist)->bkts[i].cnt;
2775 			break;
2776 		case VSD_DTYPE_DVHIST32:
2777 			bkt_lb = &VSD(dvhist32, hist)->bkts[i].val;
2778 			cnt32 = &VSD(dvhist32, hist)->bkts[i].cnt;
2779 			break;
2780 		case VSD_DTYPE_CRHIST64:
2781 			bkt_lb = &VSD(crhist64, hist)->bkts[i].lb;
2782 			cnt64 = &VSD(crhist64, hist)->bkts[i].cnt;
2783 			break;
2784 		case VSD_DTYPE_DRHIST64:
2785 			bkt_lb = &VSD(drhist64, hist)->bkts[i].lb;
2786 			bkt_ub = &VSD(drhist64, hist)->bkts[i].ub;
2787 			cnt64 = &VSD(drhist64, hist)->bkts[i].cnt;
2788 			break;
2789 		case VSD_DTYPE_DVHIST64:
2790 			bkt_lb = &VSD(dvhist64, hist)->bkts[i].val;
2791 			cnt64 = &VSD(dvhist64, hist)->bkts[i].cnt;
2792 			break;
2793 		default:
2794 			return (EINVAL);
2795 		}
2796 
2797 		switch (voi_dtype) {
2798 		case VSD_DTYPE_INT_S32:
2799 			if (voival->int32.s32 >= bkt_lb->int32.s32) {
2800 				if ((eq_only && voival->int32.s32 ==
2801 				    bkt_lb->int32.s32) ||
2802 				    (!eq_only && (!has_ub ||
2803 				    voival->int32.s32 < bkt_ub->int32.s32)))
2804 					found = 1;
2805 			}
2806 			break;
2807 		case VSD_DTYPE_INT_U32:
2808 			if (voival->int32.u32 >= bkt_lb->int32.u32) {
2809 				if ((eq_only && voival->int32.u32 ==
2810 				    bkt_lb->int32.u32) ||
2811 				    (!eq_only && (!has_ub ||
2812 				    voival->int32.u32 < bkt_ub->int32.u32)))
2813 					found = 1;
2814 			}
2815 			break;
2816 		case VSD_DTYPE_INT_S64:
2817 			if (voival->int64.s64 >= bkt_lb->int64.s64)
2818 				if ((eq_only && voival->int64.s64 ==
2819 				    bkt_lb->int64.s64) ||
2820 				    (!eq_only && (!has_ub ||
2821 				    voival->int64.s64 < bkt_ub->int64.s64)))
2822 					found = 1;
2823 			break;
2824 		case VSD_DTYPE_INT_U64:
2825 			if (voival->int64.u64 >= bkt_lb->int64.u64)
2826 				if ((eq_only && voival->int64.u64 ==
2827 				    bkt_lb->int64.u64) ||
2828 				    (!eq_only && (!has_ub ||
2829 				    voival->int64.u64 < bkt_ub->int64.u64)))
2830 					found = 1;
2831 			break;
2832 		case VSD_DTYPE_INT_SLONG:
2833 			if (voival->intlong.slong >= bkt_lb->intlong.slong)
2834 				if ((eq_only && voival->intlong.slong ==
2835 				    bkt_lb->intlong.slong) ||
2836 				    (!eq_only && (!has_ub ||
2837 				    voival->intlong.slong <
2838 				    bkt_ub->intlong.slong)))
2839 					found = 1;
2840 			break;
2841 		case VSD_DTYPE_INT_ULONG:
2842 			if (voival->intlong.ulong >= bkt_lb->intlong.ulong)
2843 				if ((eq_only && voival->intlong.ulong ==
2844 				    bkt_lb->intlong.ulong) ||
2845 				    (!eq_only && (!has_ub ||
2846 				    voival->intlong.ulong <
2847 				    bkt_ub->intlong.ulong)))
2848 					found = 1;
2849 			break;
2850 		case VSD_DTYPE_Q_S32:
2851 			if (Q_QGEQ(voival->q32.sq32, bkt_lb->q32.sq32))
2852 				if ((eq_only && Q_QEQ(voival->q32.sq32,
2853 				    bkt_lb->q32.sq32)) ||
2854 				    (!eq_only && (!has_ub ||
2855 				    Q_QLTQ(voival->q32.sq32,
2856 				    bkt_ub->q32.sq32))))
2857 					found = 1;
2858 			break;
2859 		case VSD_DTYPE_Q_U32:
2860 			if (Q_QGEQ(voival->q32.uq32, bkt_lb->q32.uq32))
2861 				if ((eq_only && Q_QEQ(voival->q32.uq32,
2862 				    bkt_lb->q32.uq32)) ||
2863 				    (!eq_only && (!has_ub ||
2864 				    Q_QLTQ(voival->q32.uq32,
2865 				    bkt_ub->q32.uq32))))
2866 					found = 1;
2867 			break;
2868 		case VSD_DTYPE_Q_S64:
2869 			if (Q_QGEQ(voival->q64.sq64, bkt_lb->q64.sq64))
2870 				if ((eq_only && Q_QEQ(voival->q64.sq64,
2871 				    bkt_lb->q64.sq64)) ||
2872 				    (!eq_only && (!has_ub ||
2873 				    Q_QLTQ(voival->q64.sq64,
2874 				    bkt_ub->q64.sq64))))
2875 					found = 1;
2876 			break;
2877 		case VSD_DTYPE_Q_U64:
2878 			if (Q_QGEQ(voival->q64.uq64, bkt_lb->q64.uq64))
2879 				if ((eq_only && Q_QEQ(voival->q64.uq64,
2880 				    bkt_lb->q64.uq64)) ||
2881 				    (!eq_only && (!has_ub ||
2882 				    Q_QLTQ(voival->q64.uq64,
2883 				    bkt_ub->q64.uq64))))
2884 					found = 1;
2885 			break;
2886 		default:
2887 			break;
2888 		}
2889 	}
2890 
2891 	if (found) {
2892 		if (is32bit)
2893 			*cnt32 += 1;
2894 		else
2895 			*cnt64 += 1;
2896 	} else {
2897 		if (is32bit)
2898 			*oob32 += 1;
2899 		else
2900 			*oob64 += 1;
2901 	}
2902 
2903 	vs->flags |= VS_VSDVALID;
2904 	return (error);
2905 }
2906 
2907 static inline int
2908 stats_v1_vsd_tdgst_compress(enum vsd_dtype vs_dtype,
2909     struct voistatdata_tdgst *tdgst, int attempt)
2910 {
2911 	struct ctdth32 *ctd32tree;
2912 	struct ctdth64 *ctd64tree;
2913 	struct voistatdata_tdgstctd32 *ctd32;
2914 	struct voistatdata_tdgstctd64 *ctd64;
2915 	uint64_t ebits, idxmask;
2916 	uint32_t bitsperidx, nebits;
2917 	int error, idx, is32bit, maxctds, remctds, tmperr;
2918 
2919 	error = 0;
2920 
2921 	switch (vs_dtype) {
2922 	case VSD_DTYPE_TDGSTCLUST32:
2923 		ctd32tree = &VSD(tdgstclust32, tdgst)->ctdtree;
2924 		if (!ARB_FULL(ctd32tree))
2925 			return (0);
2926 		VSD(tdgstclust32, tdgst)->compcnt++;
2927 		maxctds = remctds = ARB_MAXNODES(ctd32tree);
2928 		ARB_RESET_TREE(ctd32tree, ctdth32, maxctds);
2929 		VSD(tdgstclust32, tdgst)->smplcnt = 0;
2930 		is32bit = 1;
2931 		ctd64tree = NULL;
2932 		ctd64 = NULL;
2933 #ifdef DIAGNOSTIC
2934 		RB_INIT(&VSD(tdgstclust32, tdgst)->rbctdtree);
2935 #endif
2936 		break;
2937 	case VSD_DTYPE_TDGSTCLUST64:
2938 		ctd64tree = &VSD(tdgstclust64, tdgst)->ctdtree;
2939 		if (!ARB_FULL(ctd64tree))
2940 			return (0);
2941 		VSD(tdgstclust64, tdgst)->compcnt++;
2942 		maxctds = remctds = ARB_MAXNODES(ctd64tree);
2943 		ARB_RESET_TREE(ctd64tree, ctdth64, maxctds);
2944 		VSD(tdgstclust64, tdgst)->smplcnt = 0;
2945 		is32bit = 0;
2946 		ctd32tree = NULL;
2947 		ctd32 = NULL;
2948 #ifdef DIAGNOSTIC
2949 		RB_INIT(&VSD(tdgstclust64, tdgst)->rbctdtree);
2950 #endif
2951 		break;
2952 	default:
2953 		return (EINVAL);
2954 	}
2955 
2956 	/*
2957 	 * Rebuild the t-digest ARB by pseudorandomly selecting centroids and
2958 	 * re-inserting the mu/cnt of each as a value and corresponding weight.
2959 	 */
2960 
2961 	/*
2962 	 * XXXCEM: random(9) is currently rand(3), not random(3).  rand(3)
2963 	 * RAND_MAX happens to be approximately 31 bits (range [0,
2964 	 * 0x7ffffffd]), so the math kinda works out.  When/if this portion of
2965 	 * the code is compiled in userspace, it gets the random(3) behavior,
2966 	 * which has expected range [0, 0x7fffffff].
2967 	 */
2968 #define	bitsperrand 31
2969 	ebits = 0;
2970 	nebits = 0;
2971 	bitsperidx = fls(maxctds);
2972 	KASSERT(bitsperidx <= sizeof(ebits) << 3,
2973 	    ("%s: bitsperidx=%d, ebits=%d",
2974 	    __func__, bitsperidx, (int)(sizeof(ebits) << 3)));
2975 	idxmask = (UINT64_C(1) << bitsperidx) - 1;
2976 
2977 	/* Initialise the free list with randomised centroid indices. */
2978 	for (; remctds > 0; remctds--) {
2979 		while (nebits < bitsperidx) {
2980 			ebits |= ((uint64_t)random()) << nebits;
2981 			nebits += bitsperrand;
2982 			if (nebits > (sizeof(ebits) << 3))
2983 				nebits = sizeof(ebits) << 3;
2984 		}
2985 		idx = ebits & idxmask;
2986 		nebits -= bitsperidx;
2987 		ebits >>= bitsperidx;
2988 
2989 		/*
2990 		 * Select the next centroid to put on the ARB free list. We
2991 		 * start with the centroid at our randomly selected array index,
2992 		 * and work our way forwards until finding one (the latter
2993 		 * aspect reduces re-insertion randomness, but is good enough).
2994 		 */
2995 		do {
2996 			if (idx >= maxctds)
2997 				idx %= maxctds;
2998 
2999 			if (is32bit)
3000 				ctd32 = ARB_NODE(ctd32tree, idx);
3001 			else
3002 				ctd64 = ARB_NODE(ctd64tree, idx);
3003 		} while ((is32bit ? ARB_ISFREE(ctd32, ctdlnk) :
3004 		    ARB_ISFREE(ctd64, ctdlnk)) && ++idx);
3005 
3006 		/* Put the centroid on the ARB free list. */
3007 		if (is32bit)
3008 			ARB_RETURNFREE(ctd32tree, ctd32, ctdlnk);
3009 		else
3010 			ARB_RETURNFREE(ctd64tree, ctd64, ctdlnk);
3011 	}
3012 
3013 	/*
3014 	 * The free list now contains the randomised indices of every centroid.
3015 	 * Walk the free list from start to end, re-inserting each centroid's
3016 	 * mu/cnt. The tdgst_add() call may or may not consume the free centroid
3017 	 * we re-insert values from during each loop iteration, so we must latch
3018 	 * the index of the next free list centroid before the re-insertion
3019 	 * call. The previous loop above should have left the centroid pointer
3020 	 * pointing to the element at the head of the free list.
3021 	 */
3022 	KASSERT((is32bit ?
3023 	    ARB_FREEIDX(ctd32tree) == ARB_SELFIDX(ctd32tree, ctd32) :
3024 	    ARB_FREEIDX(ctd64tree) == ARB_SELFIDX(ctd64tree, ctd64)),
3025 	    ("%s: t-digest ARB@%p free list bug", __func__,
3026 	    (is32bit ? (void *)ctd32tree : (void *)ctd64tree)));
3027 	remctds = maxctds;
3028 	while ((is32bit ? ctd32 != NULL : ctd64 != NULL)) {
3029 		tmperr = 0;
3030 		if (is32bit) {
3031 			s64q_t x;
3032 
3033 			idx = ARB_NEXTFREEIDX(ctd32, ctdlnk);
3034 			/* Cloning a s32q_t into a s64q_t should never fail. */
3035 			tmperr = Q_QCLONEQ(&x, ctd32->mu);
3036 			tmperr = tmperr ? tmperr : stats_v1_vsd_tdgst_add(
3037 			    vs_dtype, tdgst, x, ctd32->cnt, attempt);
3038 			ctd32 = ARB_NODE(ctd32tree, idx);
3039 			KASSERT(ctd32 == NULL || ARB_ISFREE(ctd32, ctdlnk),
3040 			    ("%s: t-digest ARB@%p free list bug", __func__,
3041 			    ctd32tree));
3042 		} else {
3043 			idx = ARB_NEXTFREEIDX(ctd64, ctdlnk);
3044 			tmperr = stats_v1_vsd_tdgst_add(vs_dtype, tdgst,
3045 			    ctd64->mu, ctd64->cnt, attempt);
3046 			ctd64 = ARB_NODE(ctd64tree, idx);
3047 			KASSERT(ctd64 == NULL || ARB_ISFREE(ctd64, ctdlnk),
3048 			    ("%s: t-digest ARB@%p free list bug", __func__,
3049 			    ctd64tree));
3050 		}
3051 		/*
3052 		 * This process should not produce errors, bugs notwithstanding.
3053 		 * Just in case, latch any errors and attempt all re-insertions.
3054 		 */
3055 		error = tmperr ? tmperr : error;
3056 		remctds--;
3057 	}
3058 
3059 	KASSERT(remctds == 0, ("%s: t-digest ARB@%p free list bug", __func__,
3060 	    (is32bit ? (void *)ctd32tree : (void *)ctd64tree)));
3061 
3062 	return (error);
3063 }
3064 
3065 static inline int
3066 stats_v1_vsd_tdgst_add(enum vsd_dtype vs_dtype, struct voistatdata_tdgst *tdgst,
3067     s64q_t x, uint64_t weight, int attempt)
3068 {
3069 #ifdef DIAGNOSTIC
3070 	char qstr[Q_MAXSTRLEN(x, 10)];
3071 #endif
3072 	struct ctdth32 *ctd32tree;
3073 	struct ctdth64 *ctd64tree;
3074 	void *closest, *cur, *lb, *ub;
3075 	struct voistatdata_tdgstctd32 *ctd32;
3076 	struct voistatdata_tdgstctd64 *ctd64;
3077 	uint64_t cnt, smplcnt, sum, tmpsum;
3078 	s64q_t k, minz, q, z;
3079 	int error, is32bit, n;
3080 
3081 	error = 0;
3082 	minz = Q_INI(&z, 0, 0, Q_NFBITS(x));
3083 
3084 	switch (vs_dtype) {
3085 	case VSD_DTYPE_TDGSTCLUST32:
3086 		if ((UINT32_MAX - weight) < VSD(tdgstclust32, tdgst)->smplcnt)
3087 			error = EOVERFLOW;
3088 		smplcnt = VSD(tdgstclust32, tdgst)->smplcnt;
3089 		ctd32tree = &VSD(tdgstclust32, tdgst)->ctdtree;
3090 		is32bit = 1;
3091 		ctd64tree = NULL;
3092 		ctd64 = NULL;
3093 		break;
3094 	case VSD_DTYPE_TDGSTCLUST64:
3095 		if ((UINT64_MAX - weight) < VSD(tdgstclust64, tdgst)->smplcnt)
3096 			error = EOVERFLOW;
3097 		smplcnt = VSD(tdgstclust64, tdgst)->smplcnt;
3098 		ctd64tree = &VSD(tdgstclust64, tdgst)->ctdtree;
3099 		is32bit = 0;
3100 		ctd32tree = NULL;
3101 		ctd32 = NULL;
3102 		break;
3103 	default:
3104 		error = EINVAL;
3105 		break;
3106 	}
3107 
3108 	if (error)
3109 		return (error);
3110 
3111 	/*
3112 	 * Inspired by Ted Dunning's AVLTreeDigest.java
3113 	 */
3114 	do {
3115 #if defined(DIAGNOSTIC)
3116 		KASSERT(attempt < 5,
3117 		    ("%s: Too many attempts", __func__));
3118 #endif
3119 		if (attempt >= 5)
3120 			return (EAGAIN);
3121 
3122 		Q_SIFVAL(minz, Q_IFMAXVAL(minz));
3123 		closest = ub = NULL;
3124 		sum = tmpsum = 0;
3125 
3126 		if (is32bit)
3127 			lb = cur = (void *)(ctd32 = ARB_MIN(ctdth32, ctd32tree));
3128 		else
3129 			lb = cur = (void *)(ctd64 = ARB_MIN(ctdth64, ctd64tree));
3130 
3131 		if (lb == NULL) /* Empty tree. */
3132 			lb = (is32bit ? (void *)ARB_ROOT(ctd32tree) :
3133 			    (void *)ARB_ROOT(ctd64tree));
3134 
3135 		/*
3136 		 * Find the set of centroids with minimum distance to x and
3137 		 * compute the sum of counts for all centroids with mean less
3138 		 * than the first centroid in the set.
3139 		 */
3140 		for (; cur != NULL;
3141 		    cur = (is32bit ?
3142 		    (void *)(ctd32 = ARB_NEXT(ctdth32, ctd32tree, ctd32)) :
3143 		    (void *)(ctd64 = ARB_NEXT(ctdth64, ctd64tree, ctd64)))) {
3144 			if (is32bit) {
3145 				cnt = ctd32->cnt;
3146 				KASSERT(Q_PRECEQ(ctd32->mu, x),
3147 				    ("%s: Q_RELPREC(mu,x)=%d", __func__,
3148 				    Q_RELPREC(ctd32->mu, x)));
3149 				/* Ok to assign as both have same precision. */
3150 				z = ctd32->mu;
3151 			} else {
3152 				cnt = ctd64->cnt;
3153 				KASSERT(Q_PRECEQ(ctd64->mu, x),
3154 				    ("%s: Q_RELPREC(mu,x)=%d", __func__,
3155 				    Q_RELPREC(ctd64->mu, x)));
3156 				/* Ok to assign as both have same precision. */
3157 				z = ctd64->mu;
3158 			}
3159 
3160 			error = Q_QSUBQ(&z, x);
3161 #if defined(DIAGNOSTIC)
3162 			KASSERT(!error, ("%s: unexpected error %d", __func__,
3163 			    error));
3164 #endif
3165 			if (error)
3166 				return (error);
3167 
3168 			z = Q_QABS(z);
3169 			if (Q_QLTQ(z, minz)) {
3170 				minz = z;
3171 				lb = cur;
3172 				sum = tmpsum;
3173 				tmpsum += cnt;
3174 			} else if (Q_QGTQ(z, minz)) {
3175 				ub = cur;
3176 				break;
3177 			}
3178 		}
3179 
3180 		cur = (is32bit ?
3181 		    (void *)(ctd32 = (struct voistatdata_tdgstctd32 *)lb) :
3182 		    (void *)(ctd64 = (struct voistatdata_tdgstctd64 *)lb));
3183 
3184 		for (n = 0; cur != ub; cur = (is32bit ?
3185 		    (void *)(ctd32 = ARB_NEXT(ctdth32, ctd32tree, ctd32)) :
3186 		    (void *)(ctd64 = ARB_NEXT(ctdth64, ctd64tree, ctd64)))) {
3187 			if (is32bit)
3188 				cnt = ctd32->cnt;
3189 			else
3190 				cnt = ctd64->cnt;
3191 
3192 			q = Q_CTRLINI(16);
3193 			if (smplcnt == 1)
3194 				error = Q_QFRACI(&q, 1, 2);
3195 			else
3196 				/* [ sum + ((cnt - 1) / 2) ] / (smplcnt - 1) */
3197 				error = Q_QFRACI(&q, (sum << 1) + cnt - 1,
3198 				    (smplcnt - 1) << 1);
3199 			k = q;
3200 			/* k = q x 4 x samplcnt x attempt */
3201 			error |= Q_QMULI(&k, 4 * smplcnt * attempt);
3202 			/* k = k x (1 - q) */
3203 			error |= Q_QSUBI(&q, 1);
3204 			q = Q_QABS(q);
3205 			error |= Q_QMULQ(&k, q);
3206 #if defined(DIAGNOSTIC)
3207 #if !defined(_KERNEL)
3208 			double q_dbl, k_dbl, q2d, k2d;
3209 			q2d = Q_Q2D(q);
3210 			k2d = Q_Q2D(k);
3211 			q_dbl = smplcnt == 1 ? 0.5 :
3212 			    (sum + ((cnt - 1)  / 2.0)) / (double)(smplcnt - 1);
3213 			k_dbl = 4 * smplcnt * q_dbl * (1.0 - q_dbl) * attempt;
3214 			/*
3215 			 * If the difference between q and q_dbl is greater than
3216 			 * the fractional precision of q, something is off.
3217 			 * NB: q is holding the value of 1 - q
3218 			 */
3219 			q_dbl = 1.0 - q_dbl;
3220 			KASSERT((q_dbl > q2d ? q_dbl - q2d : q2d - q_dbl) <
3221 			    (1.05 * ((double)1 / (double)(1ULL << Q_NFBITS(q)))),
3222 			    ("Q-type q bad precision"));
3223 			KASSERT((k_dbl > k2d ? k_dbl - k2d : k2d - k_dbl) <
3224 			    1.0 + (0.01 * smplcnt),
3225 			    ("Q-type k bad precision"));
3226 #endif /* !_KERNEL */
3227 			KASSERT(!error, ("%s: unexpected error %d", __func__,
3228 			    error));
3229 #endif /* DIAGNOSTIC */
3230 			if (error)
3231 				return (error);
3232 			if ((is32bit && ((ctd32->cnt + weight) <=
3233 			    (uint64_t)Q_GIVAL(k))) ||
3234 			    (!is32bit && ((ctd64->cnt + weight) <=
3235 			    (uint64_t)Q_GIVAL(k)))) {
3236 				n++;
3237 				/* random() produces 31 bits. */
3238 				if (random() < (INT32_MAX / n))
3239 					closest = cur;
3240 			}
3241 			sum += cnt;
3242 		}
3243 	} while (closest == NULL &&
3244 	    (is32bit ? ARB_FULL(ctd32tree) : ARB_FULL(ctd64tree)) &&
3245 	    (error = stats_v1_vsd_tdgst_compress(vs_dtype, tdgst,
3246 	    attempt++)) == 0);
3247 
3248 	if (error)
3249 		return (error);
3250 
3251 	if (closest != NULL) {
3252 		/* Merge with an existing centroid. */
3253 		if (is32bit) {
3254 			ctd32 = (struct voistatdata_tdgstctd32 *)closest;
3255 			error = Q_QSUBQ(&x, ctd32->mu);
3256 			/*
3257 			 * The following calculation "x / (cnt + weight)"
3258 			 * computes the amount by which to adjust the centroid's
3259 			 * mu value in order to merge in the VOI sample.
3260 			 *
3261 			 * It can underflow (Q_QDIVI() returns ERANGE) when the
3262 			 * user centroids' fractional precision (which is
3263 			 * inherited by 'x') is too low to represent the result.
3264 			 *
3265 			 * A sophisticated approach to dealing with this issue
3266 			 * would minimise accumulation of error by tracking
3267 			 * underflow per centroid and making an adjustment when
3268 			 * a LSB's worth of underflow has accumulated.
3269 			 *
3270 			 * A simpler approach is to let the result underflow
3271 			 * i.e. merge the VOI sample into the centroid without
3272 			 * adjusting the centroid's mu, and rely on the user to
3273 			 * specify their t-digest with sufficient centroid
3274 			 * fractional precision such that the accumulation of
3275 			 * error from multiple underflows is of no material
3276 			 * consequence to the centroid's final value of mu.
3277 			 *
3278 			 * For the moment, the latter approach is employed by
3279 			 * simply ignoring ERANGE here.
3280 			 *
3281 			 * XXXLAS: Per-centroid underflow tracking is likely too
3282 			 * onerous, but it probably makes sense to accumulate a
3283 			 * single underflow error variable across all centroids
3284 			 * and report it as part of the digest to provide
3285 			 * additional visibility into the digest's fidelity.
3286 			 */
3287 			error = error ? error :
3288 			    Q_QDIVI(&x, ctd32->cnt + weight);
3289 			if ((error && error != ERANGE)
3290 			    || (error = Q_QADDQ(&ctd32->mu, x))) {
3291 #ifdef DIAGNOSTIC
3292 				KASSERT(!error, ("%s: unexpected error %d",
3293 				    __func__, error));
3294 #endif
3295 				return (error);
3296 			}
3297 			ctd32->cnt += weight;
3298 			error = ARB_REINSERT(ctdth32, ctd32tree, ctd32) ==
3299 			    NULL ? 0 : EALREADY;
3300 #ifdef DIAGNOSTIC
3301 			RB_REINSERT(rbctdth32,
3302 			    &VSD(tdgstclust32, tdgst)->rbctdtree, ctd32);
3303 #endif
3304 		} else {
3305 			ctd64 = (struct voistatdata_tdgstctd64 *)closest;
3306 			error = Q_QSUBQ(&x, ctd64->mu);
3307 			error = error ? error :
3308 			    Q_QDIVI(&x, ctd64->cnt + weight);
3309 			/* Refer to is32bit ERANGE discussion above. */
3310 			if ((error && error != ERANGE)
3311 			    || (error = Q_QADDQ(&ctd64->mu, x))) {
3312 				KASSERT(!error, ("%s: unexpected error %d",
3313 				    __func__, error));
3314 				return (error);
3315 			}
3316 			ctd64->cnt += weight;
3317 			error = ARB_REINSERT(ctdth64, ctd64tree, ctd64) ==
3318 			    NULL ? 0 : EALREADY;
3319 #ifdef DIAGNOSTIC
3320 			RB_REINSERT(rbctdth64,
3321 			    &VSD(tdgstclust64, tdgst)->rbctdtree, ctd64);
3322 #endif
3323 		}
3324 	} else {
3325 		/*
3326 		 * Add a new centroid. If digest compression is working
3327 		 * correctly, there should always be at least one free.
3328 		 */
3329 		if (is32bit) {
3330 			ctd32 = ARB_GETFREE(ctd32tree, ctdlnk);
3331 #ifdef DIAGNOSTIC
3332 			KASSERT(ctd32 != NULL,
3333 			    ("%s: t-digest@%p has no free centroids",
3334 			    __func__, tdgst));
3335 #endif
3336 			if (ctd32 == NULL)
3337 				return (EAGAIN);
3338 			if ((error = Q_QCPYVALQ(&ctd32->mu, x)))
3339 				return (error);
3340 			ctd32->cnt = weight;
3341 			error = ARB_INSERT(ctdth32, ctd32tree, ctd32) == NULL ?
3342 			    0 : EALREADY;
3343 #ifdef DIAGNOSTIC
3344 			RB_INSERT(rbctdth32,
3345 			    &VSD(tdgstclust32, tdgst)->rbctdtree, ctd32);
3346 #endif
3347 		} else {
3348 			ctd64 = ARB_GETFREE(ctd64tree, ctdlnk);
3349 #ifdef DIAGNOSTIC
3350 			KASSERT(ctd64 != NULL,
3351 			    ("%s: t-digest@%p has no free centroids",
3352 			    __func__, tdgst));
3353 #endif
3354 			if (ctd64 == NULL) /* Should not happen. */
3355 				return (EAGAIN);
3356 			/* Direct assignment ok as both have same type/prec. */
3357 			ctd64->mu = x;
3358 			ctd64->cnt = weight;
3359 			error = ARB_INSERT(ctdth64, ctd64tree, ctd64) == NULL ?
3360 			    0 : EALREADY;
3361 #ifdef DIAGNOSTIC
3362 			RB_INSERT(rbctdth64, &VSD(tdgstclust64,
3363 			    tdgst)->rbctdtree, ctd64);
3364 #endif
3365 		}
3366 	}
3367 
3368 	if (is32bit)
3369 		VSD(tdgstclust32, tdgst)->smplcnt += weight;
3370 	else {
3371 		VSD(tdgstclust64, tdgst)->smplcnt += weight;
3372 
3373 #ifdef DIAGNOSTIC
3374 		struct rbctdth64 *rbctdtree =
3375 		    &VSD(tdgstclust64, tdgst)->rbctdtree;
3376 		struct voistatdata_tdgstctd64 *rbctd64;
3377 		int i = 0;
3378 		ARB_FOREACH(ctd64, ctdth64, ctd64tree) {
3379 			rbctd64 = (i == 0 ? RB_MIN(rbctdth64, rbctdtree) :
3380 			    RB_NEXT(rbctdth64, rbctdtree, rbctd64));
3381 
3382 			if (i >= ARB_CURNODES(ctd64tree)
3383 			    || ctd64 != rbctd64
3384 			    || ARB_MIN(ctdth64, ctd64tree) !=
3385 			       RB_MIN(rbctdth64, rbctdtree)
3386 			    || ARB_MAX(ctdth64, ctd64tree) !=
3387 			       RB_MAX(rbctdth64, rbctdtree)
3388 			    || ARB_LEFTIDX(ctd64, ctdlnk) !=
3389 			       ARB_SELFIDX(ctd64tree, RB_LEFT(rbctd64, rblnk))
3390 			    || ARB_RIGHTIDX(ctd64, ctdlnk) !=
3391 			       ARB_SELFIDX(ctd64tree, RB_RIGHT(rbctd64, rblnk))
3392 			    || ARB_PARENTIDX(ctd64, ctdlnk) !=
3393 			       ARB_SELFIDX(ctd64tree,
3394 			       RB_PARENT(rbctd64, rblnk))) {
3395 				Q_TOSTR(ctd64->mu, -1, 10, qstr, sizeof(qstr));
3396 				printf("ARB ctd=%3d p=%3d l=%3d r=%3d c=%2d "
3397 				    "mu=%s\n",
3398 				    (int)ARB_SELFIDX(ctd64tree, ctd64),
3399 				    ARB_PARENTIDX(ctd64, ctdlnk),
3400 				    ARB_LEFTIDX(ctd64, ctdlnk),
3401 				    ARB_RIGHTIDX(ctd64, ctdlnk),
3402 				    ARB_COLOR(ctd64, ctdlnk),
3403 				    qstr);
3404 
3405 				Q_TOSTR(rbctd64->mu, -1, 10, qstr,
3406 				    sizeof(qstr));
3407 				struct voistatdata_tdgstctd64 *parent;
3408 				parent = RB_PARENT(rbctd64, rblnk);
3409 				int rb_color =
3410 					parent == NULL ? 0 :
3411 					RB_LEFT(parent, rblnk) == rbctd64 ?
3412 					(_RB_BITSUP(parent, rblnk) & _RB_L) != 0 :
3413  					(_RB_BITSUP(parent, rblnk) & _RB_R) != 0;
3414 				printf(" RB ctd=%3d p=%3d l=%3d r=%3d c=%2d "
3415 				    "mu=%s\n",
3416 				    (int)ARB_SELFIDX(ctd64tree, rbctd64),
3417 				    (int)ARB_SELFIDX(ctd64tree,
3418 				      RB_PARENT(rbctd64, rblnk)),
3419 				    (int)ARB_SELFIDX(ctd64tree,
3420 				      RB_LEFT(rbctd64, rblnk)),
3421 				    (int)ARB_SELFIDX(ctd64tree,
3422 				      RB_RIGHT(rbctd64, rblnk)),
3423 				    rb_color,
3424 				    qstr);
3425 
3426 				panic("RB@%p and ARB@%p trees differ\n",
3427 				    rbctdtree, ctd64tree);
3428 			}
3429 			i++;
3430 		}
3431 #endif /* DIAGNOSTIC */
3432 	}
3433 
3434 	return (error);
3435 }
3436 
3437 static inline int
3438 stats_v1_voi_update_tdgst(enum vsd_dtype voi_dtype, struct voistatdata *voival,
3439     struct voistat *vs, struct voistatdata_tdgst *tdgst)
3440 {
3441 	s64q_t x;
3442 	int error;
3443 
3444 	error = 0;
3445 
3446 	switch (vs->dtype) {
3447 	case VSD_DTYPE_TDGSTCLUST32:
3448 		/* Use same precision as the user's centroids. */
3449 		Q_INI(&x, 0, 0, Q_NFBITS(
3450 		    ARB_CNODE(&VSD(tdgstclust32, tdgst)->ctdtree, 0)->mu));
3451 		break;
3452 	case VSD_DTYPE_TDGSTCLUST64:
3453 		/* Use same precision as the user's centroids. */
3454 		Q_INI(&x, 0, 0, Q_NFBITS(
3455 		    ARB_CNODE(&VSD(tdgstclust64, tdgst)->ctdtree, 0)->mu));
3456 		break;
3457 	default:
3458 		KASSERT(vs->dtype == VSD_DTYPE_TDGSTCLUST32 ||
3459 		    vs->dtype == VSD_DTYPE_TDGSTCLUST64,
3460 		    ("%s: vs->dtype(%d) != VSD_DTYPE_TDGSTCLUST<32|64>",
3461 		    __func__, vs->dtype));
3462 		return (EINVAL);
3463 	}
3464 
3465 	/*
3466 	 * XXXLAS: Should have both a signed and unsigned 'x' variable to avoid
3467 	 * returning EOVERFLOW if the voival would have fit in a u64q_t.
3468 	 */
3469 	switch (voi_dtype) {
3470 	case VSD_DTYPE_INT_S32:
3471 		error = Q_QCPYVALI(&x, voival->int32.s32);
3472 		break;
3473 	case VSD_DTYPE_INT_U32:
3474 		error = Q_QCPYVALI(&x, voival->int32.u32);
3475 		break;
3476 	case VSD_DTYPE_INT_S64:
3477 		error = Q_QCPYVALI(&x, voival->int64.s64);
3478 		break;
3479 	case VSD_DTYPE_INT_U64:
3480 		error = Q_QCPYVALI(&x, voival->int64.u64);
3481 		break;
3482 	case VSD_DTYPE_INT_SLONG:
3483 		error = Q_QCPYVALI(&x, voival->intlong.slong);
3484 		break;
3485 	case VSD_DTYPE_INT_ULONG:
3486 		error = Q_QCPYVALI(&x, voival->intlong.ulong);
3487 		break;
3488 	case VSD_DTYPE_Q_S32:
3489 		error = Q_QCPYVALQ(&x, voival->q32.sq32);
3490 		break;
3491 	case VSD_DTYPE_Q_U32:
3492 		error = Q_QCPYVALQ(&x, voival->q32.uq32);
3493 		break;
3494 	case VSD_DTYPE_Q_S64:
3495 		error = Q_QCPYVALQ(&x, voival->q64.sq64);
3496 		break;
3497 	case VSD_DTYPE_Q_U64:
3498 		error = Q_QCPYVALQ(&x, voival->q64.uq64);
3499 		break;
3500 	default:
3501 		error = EINVAL;
3502 		break;
3503 	}
3504 
3505 	if (error ||
3506 	    (error = stats_v1_vsd_tdgst_add(vs->dtype, tdgst, x, 1, 1)))
3507 		return (error);
3508 
3509 	vs->flags |= VS_VSDVALID;
3510 	return (0);
3511 }
3512 
3513 int
3514 stats_v1_voi_update(struct statsblobv1 *sb, int32_t voi_id,
3515     enum vsd_dtype voi_dtype, struct voistatdata *voival, uint32_t flags)
3516 {
3517 	struct voi *v;
3518 	struct voistat *vs;
3519 	void *statevsd, *vsd;
3520 	int error, i, tmperr;
3521 
3522 	error = 0;
3523 
3524 	if (sb == NULL || sb->abi != STATS_ABI_V1 || voi_id >= NVOIS(sb) ||
3525 	    voi_dtype == 0 || voi_dtype >= VSD_NUM_DTYPES || voival == NULL)
3526 		return (EINVAL);
3527 	v = &sb->vois[voi_id];
3528 	if (voi_dtype != v->dtype || v->id < 0 ||
3529 	    ((flags & SB_VOI_RELUPDATE) && !(v->flags & VOI_REQSTATE)))
3530 		return (EINVAL);
3531 
3532 	vs = BLOB_OFFSET(sb, v->stats_off);
3533 	if (v->flags & VOI_REQSTATE)
3534 		statevsd = BLOB_OFFSET(sb, vs->data_off);
3535 	else
3536 		statevsd = NULL;
3537 
3538 	if (flags & SB_VOI_RELUPDATE) {
3539 		switch (voi_dtype) {
3540 		case VSD_DTYPE_INT_S32:
3541 			voival->int32.s32 +=
3542 			    VSD(voistate, statevsd)->prev.int32.s32;
3543 			break;
3544 		case VSD_DTYPE_INT_U32:
3545 			voival->int32.u32 +=
3546 			    VSD(voistate, statevsd)->prev.int32.u32;
3547 			break;
3548 		case VSD_DTYPE_INT_S64:
3549 			voival->int64.s64 +=
3550 			    VSD(voistate, statevsd)->prev.int64.s64;
3551 			break;
3552 		case VSD_DTYPE_INT_U64:
3553 			voival->int64.u64 +=
3554 			    VSD(voistate, statevsd)->prev.int64.u64;
3555 			break;
3556 		case VSD_DTYPE_INT_SLONG:
3557 			voival->intlong.slong +=
3558 			    VSD(voistate, statevsd)->prev.intlong.slong;
3559 			break;
3560 		case VSD_DTYPE_INT_ULONG:
3561 			voival->intlong.ulong +=
3562 			    VSD(voistate, statevsd)->prev.intlong.ulong;
3563 			break;
3564 		case VSD_DTYPE_Q_S32:
3565 			error = Q_QADDQ(&voival->q32.sq32,
3566 			    VSD(voistate, statevsd)->prev.q32.sq32);
3567 			break;
3568 		case VSD_DTYPE_Q_U32:
3569 			error = Q_QADDQ(&voival->q32.uq32,
3570 			    VSD(voistate, statevsd)->prev.q32.uq32);
3571 			break;
3572 		case VSD_DTYPE_Q_S64:
3573 			error = Q_QADDQ(&voival->q64.sq64,
3574 			    VSD(voistate, statevsd)->prev.q64.sq64);
3575 			break;
3576 		case VSD_DTYPE_Q_U64:
3577 			error = Q_QADDQ(&voival->q64.uq64,
3578 			    VSD(voistate, statevsd)->prev.q64.uq64);
3579 			break;
3580 		default:
3581 			KASSERT(0, ("Unknown VOI data type %d", voi_dtype));
3582 			break;
3583 		}
3584 	}
3585 
3586 	if (error)
3587 		return (error);
3588 
3589 	for (i = v->voistatmaxid; i > 0; i--) {
3590 		vs = &((struct voistat *)BLOB_OFFSET(sb, v->stats_off))[i];
3591 		if (vs->stype < 0)
3592 			continue;
3593 
3594 		vsd = BLOB_OFFSET(sb, vs->data_off);
3595 
3596 		switch (vs->stype) {
3597 		case VS_STYPE_MAX:
3598 			tmperr = stats_v1_voi_update_max(voi_dtype, voival,
3599 			    vs, vsd);
3600 			break;
3601 		case VS_STYPE_MIN:
3602 			tmperr = stats_v1_voi_update_min(voi_dtype, voival,
3603 			    vs, vsd);
3604 			break;
3605 		case VS_STYPE_SUM:
3606 			tmperr = stats_v1_voi_update_sum(voi_dtype, voival,
3607 			    vs, vsd);
3608 			break;
3609 		case VS_STYPE_HIST:
3610 			tmperr = stats_v1_voi_update_hist(voi_dtype, voival,
3611 			    vs, vsd);
3612 			break;
3613 		case VS_STYPE_TDGST:
3614 			tmperr = stats_v1_voi_update_tdgst(voi_dtype, voival,
3615 			    vs, vsd);
3616 			break;
3617 		default:
3618 			KASSERT(0, ("Unknown VOI stat type %d", vs->stype));
3619 			break;
3620 		}
3621 
3622 		if (tmperr) {
3623 			error = tmperr;
3624 			VS_INCERRS(vs);
3625 		}
3626 	}
3627 
3628 	if (statevsd) {
3629 		switch (voi_dtype) {
3630 		case VSD_DTYPE_INT_S32:
3631 			VSD(voistate, statevsd)->prev.int32.s32 =
3632 			    voival->int32.s32;
3633 			break;
3634 		case VSD_DTYPE_INT_U32:
3635 			VSD(voistate, statevsd)->prev.int32.u32 =
3636 			    voival->int32.u32;
3637 			break;
3638 		case VSD_DTYPE_INT_S64:
3639 			VSD(voistate, statevsd)->prev.int64.s64 =
3640 			    voival->int64.s64;
3641 			break;
3642 		case VSD_DTYPE_INT_U64:
3643 			VSD(voistate, statevsd)->prev.int64.u64 =
3644 			    voival->int64.u64;
3645 			break;
3646 		case VSD_DTYPE_INT_SLONG:
3647 			VSD(voistate, statevsd)->prev.intlong.slong =
3648 			    voival->intlong.slong;
3649 			break;
3650 		case VSD_DTYPE_INT_ULONG:
3651 			VSD(voistate, statevsd)->prev.intlong.ulong =
3652 			    voival->intlong.ulong;
3653 			break;
3654 		case VSD_DTYPE_Q_S32:
3655 			error = Q_QCPYVALQ(
3656 			    &VSD(voistate, statevsd)->prev.q32.sq32,
3657 			    voival->q32.sq32);
3658 			break;
3659 		case VSD_DTYPE_Q_U32:
3660 			error = Q_QCPYVALQ(
3661 			    &VSD(voistate, statevsd)->prev.q32.uq32,
3662 			    voival->q32.uq32);
3663 			break;
3664 		case VSD_DTYPE_Q_S64:
3665 			error = Q_QCPYVALQ(
3666 			    &VSD(voistate, statevsd)->prev.q64.sq64,
3667 			    voival->q64.sq64);
3668 			break;
3669 		case VSD_DTYPE_Q_U64:
3670 			error = Q_QCPYVALQ(
3671 			    &VSD(voistate, statevsd)->prev.q64.uq64,
3672 			    voival->q64.uq64);
3673 			break;
3674 		default:
3675 			KASSERT(0, ("Unknown VOI data type %d", voi_dtype));
3676 			break;
3677 		}
3678 	}
3679 
3680 	return (error);
3681 }
3682 
3683 #ifdef _KERNEL
3684 
3685 static void
3686 stats_init(void *arg)
3687 {
3688 
3689 }
3690 SYSINIT(stats, SI_SUB_KDTRACE, SI_ORDER_FIRST, stats_init, NULL);
3691 
3692 /*
3693  * Sysctl handler to display the list of available stats templates.
3694  */
3695 static int
3696 stats_tpl_list_available(SYSCTL_HANDLER_ARGS)
3697 {
3698 	struct sbuf *s;
3699 	int err, i;
3700 
3701 	err = 0;
3702 
3703 	/* We can tolerate ntpl being stale, so do not take the lock. */
3704 	s = sbuf_new(NULL, NULL, /* +1 per tpl for , */
3705 	    ntpl * (STATS_TPL_MAX_STR_SPEC_LEN + 1), SBUF_FIXEDLEN);
3706 	if (s == NULL)
3707 		return (ENOMEM);
3708 
3709 	TPL_LIST_RLOCK();
3710 	for (i = 0; i < ntpl; i++) {
3711 		err = sbuf_printf(s, "%s\"%s\":%u", i ? "," : "",
3712 		    tpllist[i]->mb->tplname, tpllist[i]->mb->tplhash);
3713 		if (err) {
3714 			/* Sbuf overflow condition. */
3715 			err = EOVERFLOW;
3716 			break;
3717 		}
3718 	}
3719 	TPL_LIST_RUNLOCK();
3720 
3721 	if (!err) {
3722 		sbuf_finish(s);
3723 		err = sysctl_handle_string(oidp, sbuf_data(s), 0, req);
3724 	}
3725 
3726 	sbuf_delete(s);
3727 	return (err);
3728 }
3729 
3730 /*
3731  * Called by subsystem-specific sysctls to report and/or parse the list of
3732  * templates being sampled and their sampling rates. A stats_tpl_sr_cb_t
3733  * conformant function pointer must be passed in as arg1, which is used to
3734  * interact with the subsystem's stats template sample rates list. If arg2 > 0,
3735  * a zero-initialised allocation of arg2-sized contextual memory is
3736  * heap-allocated and passed in to all subsystem callbacks made during the
3737  * operation of stats_tpl_sample_rates().
3738  *
3739  * XXXLAS: Assumes templates are never removed, which is currently true but may
3740  * need to be reworked in future if dynamic template management becomes a
3741  * requirement e.g. to support kernel module based templates.
3742  */
3743 int
3744 stats_tpl_sample_rates(SYSCTL_HANDLER_ARGS)
3745 {
3746 	char kvpair_fmt[16], tplspec_fmt[16];
3747 	char tpl_spec[STATS_TPL_MAX_STR_SPEC_LEN];
3748 	char tpl_name[TPL_MAX_NAME_LEN + 2]; /* +2 for "" */
3749 	stats_tpl_sr_cb_t subsys_cb;
3750 	void *subsys_ctx;
3751 	char *buf, *new_rates_usr_str, *tpl_name_p;
3752 	struct stats_tpl_sample_rate *rates;
3753 	struct sbuf *s, _s;
3754 	uint32_t cum_pct, pct, tpl_hash;
3755 	int err, i, off, len, newlen, nrates;
3756 
3757 	buf = NULL;
3758 	rates = NULL;
3759 	err = nrates = 0;
3760 	subsys_cb = (stats_tpl_sr_cb_t)arg1;
3761 	KASSERT(subsys_cb != NULL, ("%s: subsys_cb == arg1 == NULL", __func__));
3762 	if (arg2 > 0)
3763 		subsys_ctx = malloc(arg2, M_TEMP, M_WAITOK | M_ZERO);
3764 	else
3765 		subsys_ctx = NULL;
3766 
3767 	/* Grab current count of subsystem rates. */
3768 	err = subsys_cb(TPL_SR_UNLOCKED_GET, NULL, &nrates, subsys_ctx);
3769 	if (err)
3770 		goto done;
3771 
3772 	/* +1 to ensure we can append '\0' post copyin, +5 per rate for =nnn, */
3773 	len = max(req->newlen + 1, nrates * (STATS_TPL_MAX_STR_SPEC_LEN + 5));
3774 
3775 	if (req->oldptr != NULL || req->newptr != NULL)
3776 		buf = malloc(len, M_TEMP, M_WAITOK);
3777 
3778 	if (req->oldptr != NULL) {
3779 		if (nrates == 0) {
3780 			/* No rates, so return an empty string via oldptr. */
3781 			err = SYSCTL_OUT(req, "", 1);
3782 			if (err)
3783 				goto done;
3784 			goto process_new;
3785 		}
3786 
3787 		s = sbuf_new(&_s, buf, len, SBUF_FIXEDLEN | SBUF_INCLUDENUL);
3788 
3789 		/* Grab locked count of, and ptr to, subsystem rates. */
3790 		err = subsys_cb(TPL_SR_RLOCKED_GET, &rates, &nrates,
3791 		    subsys_ctx);
3792 		if (err)
3793 			goto done;
3794 		TPL_LIST_RLOCK();
3795 		for (i = 0; i < nrates && !err; i++) {
3796 			err = sbuf_printf(s, "%s\"%s\":%u=%u", i ? "," : "",
3797 			    tpllist[rates[i].tpl_slot_id]->mb->tplname,
3798 			    tpllist[rates[i].tpl_slot_id]->mb->tplhash,
3799 			    rates[i].tpl_sample_pct);
3800 		}
3801 		TPL_LIST_RUNLOCK();
3802 		/* Tell subsystem that we're done with its rates list. */
3803 		err = subsys_cb(TPL_SR_RUNLOCK, &rates, &nrates, subsys_ctx);
3804 		if (err)
3805 			goto done;
3806 
3807 		err = sbuf_finish(s);
3808 		if (err)
3809 			goto done; /* We lost a race for buf to be too small. */
3810 
3811 		/* Return the rendered string data via oldptr. */
3812 		err = SYSCTL_OUT(req, sbuf_data(s), sbuf_len(s));
3813 	} else {
3814 		/* Return the upper bound size for buffer sizing requests. */
3815 		err = SYSCTL_OUT(req, NULL, len);
3816 	}
3817 
3818 process_new:
3819 	if (err || req->newptr == NULL)
3820 		goto done;
3821 
3822 	newlen = req->newlen - req->newidx;
3823 	err = SYSCTL_IN(req, buf, newlen);
3824 	if (err)
3825 		goto done;
3826 
3827 	/*
3828 	 * Initialise format strings at run time.
3829 	 *
3830 	 * Write the max template spec string length into the
3831 	 * template_spec=percent key-value pair parsing format string as:
3832 	 *     " %<width>[^=]=%u %n"
3833 	 *
3834 	 * Write the max template name string length into the tplname:tplhash
3835 	 * parsing format string as:
3836 	 *     "%<width>[^:]:%u"
3837 	 *
3838 	 * Subtract 1 for \0 appended by sscanf().
3839 	 */
3840 	sprintf(kvpair_fmt, " %%%zu[^=]=%%u %%n", sizeof(tpl_spec) - 1);
3841 	sprintf(tplspec_fmt, "%%%zu[^:]:%%u", sizeof(tpl_name) - 1);
3842 
3843 	/*
3844 	 * Parse each CSV key-value pair specifying a template and its sample
3845 	 * percentage. Whitespace either side of a key-value pair is ignored.
3846 	 * Templates can be specified by name, hash, or name and hash per the
3847 	 * following formats (chars in [] are optional):
3848 	 *    ["]<tplname>["]=<percent>
3849 	 *    :hash=pct
3850 	 *    ["]<tplname>["]:hash=<percent>
3851 	 */
3852 	cum_pct = nrates = 0;
3853 	rates = NULL;
3854 	buf[newlen] = '\0'; /* buf is at least newlen+1 in size. */
3855 	new_rates_usr_str = buf;
3856 	while (isspace(*new_rates_usr_str))
3857 		new_rates_usr_str++; /* Skip leading whitespace. */
3858 	while (*new_rates_usr_str != '\0') {
3859 		tpl_name_p = tpl_name;
3860 		tpl_name[0] = '\0';
3861 		tpl_hash = 0;
3862 		off = 0;
3863 
3864 		/*
3865 		 * Parse key-value pair which must perform 2 conversions, then
3866 		 * parse the template spec to extract either name, hash, or name
3867 		 * and hash depending on the three possible spec formats. The
3868 		 * tplspec_fmt format specifier parses name or name and hash
3869 		 * template specs, while the ":%u" format specifier parses
3870 		 * hash-only template specs. If parsing is successfull, ensure
3871 		 * the cumulative sampling percentage does not exceed 100.
3872 		 */
3873 		err = EINVAL;
3874 		if (2 != sscanf(new_rates_usr_str, kvpair_fmt, tpl_spec, &pct,
3875 		    &off))
3876 			break;
3877 		if ((1 > sscanf(tpl_spec, tplspec_fmt, tpl_name, &tpl_hash)) &&
3878 		    (1 != sscanf(tpl_spec, ":%u", &tpl_hash)))
3879 			break;
3880 		if ((cum_pct += pct) > 100)
3881 			break;
3882 		err = 0;
3883 
3884 		/* Strip surrounding "" from template name if present. */
3885 		len = strlen(tpl_name);
3886 		if (len > 0) {
3887 			if (tpl_name[len - 1] == '"')
3888 				tpl_name[--len] = '\0';
3889 			if (tpl_name[0] == '"') {
3890 				tpl_name_p++;
3891 				len--;
3892 			}
3893 		}
3894 
3895 		rates = stats_realloc(rates, 0, /* oldsz is unused in kernel. */
3896 		    (nrates + 1) * sizeof(*rates), M_WAITOK);
3897 		rates[nrates].tpl_slot_id =
3898 		    stats_tpl_fetch_allocid(len ? tpl_name_p : NULL, tpl_hash);
3899 		if (rates[nrates].tpl_slot_id < 0) {
3900 			err = -rates[nrates].tpl_slot_id;
3901 			break;
3902 		}
3903 		rates[nrates].tpl_sample_pct = pct;
3904 		nrates++;
3905 		new_rates_usr_str += off;
3906 		if (*new_rates_usr_str != ',')
3907 			break; /* End-of-input or malformed. */
3908 		new_rates_usr_str++; /* Move past comma to next pair. */
3909 	}
3910 
3911 	if (!err) {
3912 		if ((new_rates_usr_str - buf) < newlen) {
3913 			/* Entire input has not been consumed. */
3914 			err = EINVAL;
3915 		} else {
3916 			/*
3917 			 * Give subsystem the new rates. They'll return the
3918 			 * appropriate rates pointer for us to garbage collect.
3919 			 */
3920 			err = subsys_cb(TPL_SR_PUT, &rates, &nrates,
3921 			    subsys_ctx);
3922 		}
3923 	}
3924 	stats_free(rates);
3925 
3926 done:
3927 	free(buf, M_TEMP);
3928 	free(subsys_ctx, M_TEMP);
3929 	return (err);
3930 }
3931 
3932 SYSCTL_NODE(_kern, OID_AUTO, stats, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
3933     "stats(9) MIB");
3934 
3935 SYSCTL_PROC(_kern_stats, OID_AUTO, templates,
3936     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
3937     stats_tpl_list_available, "A",
3938     "list the name/hash of all available stats(9) templates");
3939 
3940 #else /* ! _KERNEL */
3941 
3942 static void __attribute__ ((constructor))
3943 stats_constructor(void)
3944 {
3945 
3946 	pthread_rwlock_init(&tpllistlock, NULL);
3947 }
3948 
3949 static void __attribute__ ((destructor))
3950 stats_destructor(void)
3951 {
3952 
3953 	pthread_rwlock_destroy(&tpllistlock);
3954 }
3955 
3956 #endif /* _KERNEL */
3957