xref: /freebsd/sys/kern/subr_stats.c (revision 19261079b74319502c6ffa1249920079f0f69a72)
1 /*-
2  * Copyright (c) 2014-2018 Netflix, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 /*
30  * Author: Lawrence Stewart <lstewart@netflix.com>
31  */
32 
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35 
36 #include <sys/param.h>
37 #include <sys/arb.h>
38 #include <sys/ctype.h>
39 #include <sys/errno.h>
40 #include <sys/hash.h>
41 #include <sys/limits.h>
42 #include <sys/malloc.h>
43 #include <sys/qmath.h>
44 #include <sys/sbuf.h>
45 #if defined(DIAGNOSTIC)
46 #include <sys/tree.h>
47 #endif
48 #include <sys/stats.h> /* Must come after qmath.h and arb.h */
49 #include <sys/stddef.h>
50 #include <sys/stdint.h>
51 #include <sys/time.h>
52 
53 #ifdef _KERNEL
54 #include <sys/kernel.h>
55 #include <sys/lock.h>
56 #include <sys/rwlock.h>
57 #include <sys/sysctl.h>
58 #include <sys/systm.h>
59 #else /* ! _KERNEL */
60 #include <pthread.h>
61 #include <stdbool.h>
62 #include <stdio.h>
63 #include <stdlib.h>
64 #include <string.h>
65 #endif /* _KERNEL */
66 
67 struct voistatdata_voistate {
68 	/* Previous VOI value for diff calculation. */
69 	struct voistatdata_numeric prev;
70 };
71 
72 #define	VS_VSDVALID	0x0001	/* Stat's voistatdata updated at least once. */
73 struct voistat {
74 	int8_t		stype;		/* Type of stat e.g. VS_STYPE_SUM. */
75 	enum vsd_dtype	dtype : 8;	/* Data type of this stat's data. */
76 	uint16_t	data_off;	/* Blob offset for this stat's data. */
77 	uint16_t	dsz;		/* Size of stat's data. */
78 #define	VS_EBITS 8
79 	uint16_t	errs : VS_EBITS;/* Non-wrapping error count. */
80 	uint16_t	flags : 16 - VS_EBITS;
81 };
82 /* The voistat error count is capped to avoid wrapping. */
83 #define	VS_INCERRS(vs) do {						\
84 	if ((vs)->errs < (1U << VS_EBITS) - 1)				\
85 		(vs)->errs++;						\
86 } while (0)
87 
88 /*
89  * Ideas for flags:
90  *   - Global or entity specific (global would imply use of counter(9)?)
91  *   - Whether to reset stats on read or not
92  *   - Signal an overflow?
93  *   - Compressed voistat array
94  */
95 #define	VOI_REQSTATE	0x0001	/* VOI requires VS_STYPE_VOISTATE. */
96 struct voi {
97 	int16_t		id;		/* VOI id. */
98 	enum vsd_dtype	dtype : 8;	/* Data type of the VOI itself. */
99 	int8_t		voistatmaxid;	/* Largest allocated voistat index. */
100 	uint16_t	stats_off;	/* Blob offset for this VOIs stats. */
101 	uint16_t	flags;
102 };
103 
104 /*
105  * Memory for the entire blob is allocated as a slab and then offsets are
106  * maintained to carve up the slab into sections holding different data types.
107  *
108  * Ideas for flags:
109  * - Compressed voi array (trade off memory usage vs search time)
110  * - Units of offsets (default bytes, flag for e.g. vm_page/KiB/Mib)
111  */
112 struct statsblobv1 {
113 	uint8_t		abi;
114 	uint8_t		endian;
115 	uint16_t	flags;
116 	uint16_t	maxsz;
117 	uint16_t	cursz;
118 	/* Fields from here down are opaque to consumers. */
119 	uint32_t	tplhash;	/* Base template hash ID. */
120 	uint16_t	stats_off;	/* voistat array blob offset. */
121 	uint16_t	statsdata_off;	/* voistatdata array blob offset. */
122 	sbintime_t	created;	/* Blob creation time. */
123 	sbintime_t	lastrst;	/* Time of last reset. */
124 	struct voi	vois[];		/* Array indexed by [voi_id]. */
125 } __aligned(sizeof(void *));
126 _Static_assert(offsetof(struct statsblobv1, cursz) +
127     SIZEOF_MEMBER(struct statsblobv1, cursz) ==
128     offsetof(struct statsblob, opaque),
129     "statsblobv1 ABI mismatch");
130 
131 struct statsblobv1_tpl {
132 	struct metablob		*mb;
133 	struct statsblobv1	*sb;
134 };
135 
136 /* Context passed to iterator callbacks. */
137 struct sb_iter_ctx {
138 	void		*usrctx;	/* Caller supplied context. */
139 	uint32_t	flags;		/* Flags for current iteration. */
140 	int16_t		vslot;		/* struct voi slot index. */
141 	int8_t		vsslot;		/* struct voistat slot index. */
142 };
143 
144 struct sb_tostrcb_ctx {
145 	struct sbuf		*buf;
146 	struct statsblob_tpl	*tpl;
147 	enum sb_str_fmt	fmt;
148 	uint32_t		flags;
149 };
150 
151 struct sb_visitcb_ctx {
152 	stats_blob_visitcb_t	cb;
153 	void			*usrctx;
154 };
155 
156 /* Stats blob iterator callback. */
157 typedef int (*stats_v1_blob_itercb_t)(struct statsblobv1 *sb, struct voi *v,
158     struct voistat *vs, struct sb_iter_ctx *ctx);
159 
160 #ifdef _KERNEL
161 static struct rwlock tpllistlock;
162 RW_SYSINIT(stats_tpl_list, &tpllistlock, "Stat template list lock");
163 #define	TPL_LIST_RLOCK() rw_rlock(&tpllistlock)
164 #define	TPL_LIST_RUNLOCK() rw_runlock(&tpllistlock)
165 #define	TPL_LIST_WLOCK() rw_wlock(&tpllistlock)
166 #define	TPL_LIST_WUNLOCK() rw_wunlock(&tpllistlock)
167 #define	TPL_LIST_LOCK_ASSERT() rw_assert(&tpllistlock, RA_LOCKED)
168 #define	TPL_LIST_RLOCK_ASSERT() rw_assert(&tpllistlock, RA_RLOCKED)
169 #define	TPL_LIST_WLOCK_ASSERT() rw_assert(&tpllistlock, RA_WLOCKED)
170 MALLOC_DEFINE(M_STATS, "stats(9) related memory", "stats(9) related memory");
171 #define	stats_free(ptr) free((ptr), M_STATS)
172 #else /* ! _KERNEL */
173 static void stats_constructor(void);
174 static void stats_destructor(void);
175 static pthread_rwlock_t tpllistlock;
176 #define	TPL_LIST_UNLOCK() pthread_rwlock_unlock(&tpllistlock)
177 #define	TPL_LIST_RLOCK() pthread_rwlock_rdlock(&tpllistlock)
178 #define	TPL_LIST_RUNLOCK() TPL_LIST_UNLOCK()
179 #define	TPL_LIST_WLOCK() pthread_rwlock_wrlock(&tpllistlock)
180 #define	TPL_LIST_WUNLOCK() TPL_LIST_UNLOCK()
181 #define	TPL_LIST_LOCK_ASSERT() do { } while (0)
182 #define	TPL_LIST_RLOCK_ASSERT() do { } while (0)
183 #define	TPL_LIST_WLOCK_ASSERT() do { } while (0)
184 #ifdef NDEBUG
185 #define	KASSERT(cond, msg) do {} while (0)
186 #define	stats_abort() do {} while (0)
187 #else /* ! NDEBUG */
188 #define	KASSERT(cond, msg) do { \
189 	if (!(cond)) { \
190 		panic msg; \
191 	} \
192 } while (0)
193 #define	stats_abort() abort()
194 #endif /* NDEBUG */
195 #define	stats_free(ptr) free(ptr)
196 #define	panic(fmt, ...) do { \
197 	fprintf(stderr, (fmt), ##__VA_ARGS__); \
198 	stats_abort(); \
199 } while (0)
200 #endif /* _KERNEL */
201 
202 #define	SB_V1_MAXSZ 65535
203 
204 /* Obtain a blob offset pointer. */
205 #define	BLOB_OFFSET(sb, off) ((void *)(((uint8_t *)(sb)) + (off)))
206 
207 /*
208  * Number of VOIs in the blob's vois[] array. By virtue of struct voi being a
209  * power of 2 size, we can shift instead of divide. The shift amount must be
210  * updated if sizeof(struct voi) ever changes, which the assert should catch.
211  */
212 #define	NVOIS(sb) ((int32_t)((((struct statsblobv1 *)(sb))->stats_off - \
213     sizeof(struct statsblobv1)) >> 3))
214 _Static_assert(sizeof(struct voi) == 8, "statsblobv1 voi ABI mismatch");
215 
216 /* Try restrict names to alphanumeric and underscore to simplify JSON compat. */
217 const char *vs_stype2name[VS_NUM_STYPES] = {
218 	[VS_STYPE_VOISTATE] = "VOISTATE",
219 	[VS_STYPE_SUM] = "SUM",
220 	[VS_STYPE_MAX] = "MAX",
221 	[VS_STYPE_MIN] = "MIN",
222 	[VS_STYPE_HIST] = "HIST",
223 	[VS_STYPE_TDGST] = "TDGST",
224 };
225 
226 const char *vs_stype2desc[VS_NUM_STYPES] = {
227 	[VS_STYPE_VOISTATE] = "VOI related state data (not a real stat)",
228 	[VS_STYPE_SUM] = "Simple arithmetic accumulator",
229 	[VS_STYPE_MAX] = "Maximum observed VOI value",
230 	[VS_STYPE_MIN] = "Minimum observed VOI value",
231 	[VS_STYPE_HIST] = "Histogram of observed VOI values",
232 	[VS_STYPE_TDGST] = "t-digest of observed VOI values",
233 };
234 
235 const char *vsd_dtype2name[VSD_NUM_DTYPES] = {
236 	[VSD_DTYPE_VOISTATE] = "VOISTATE",
237 	[VSD_DTYPE_INT_S32] = "INT_S32",
238 	[VSD_DTYPE_INT_U32] = "INT_U32",
239 	[VSD_DTYPE_INT_S64] = "INT_S64",
240 	[VSD_DTYPE_INT_U64] = "INT_U64",
241 	[VSD_DTYPE_INT_SLONG] = "INT_SLONG",
242 	[VSD_DTYPE_INT_ULONG] = "INT_ULONG",
243 	[VSD_DTYPE_Q_S32] = "Q_S32",
244 	[VSD_DTYPE_Q_U32] = "Q_U32",
245 	[VSD_DTYPE_Q_S64] = "Q_S64",
246 	[VSD_DTYPE_Q_U64] = "Q_U64",
247 	[VSD_DTYPE_CRHIST32] = "CRHIST32",
248 	[VSD_DTYPE_DRHIST32] = "DRHIST32",
249 	[VSD_DTYPE_DVHIST32] = "DVHIST32",
250 	[VSD_DTYPE_CRHIST64] = "CRHIST64",
251 	[VSD_DTYPE_DRHIST64] = "DRHIST64",
252 	[VSD_DTYPE_DVHIST64] = "DVHIST64",
253 	[VSD_DTYPE_TDGSTCLUST32] = "TDGSTCLUST32",
254 	[VSD_DTYPE_TDGSTCLUST64] = "TDGSTCLUST64",
255 };
256 
257 const size_t vsd_dtype2size[VSD_NUM_DTYPES] = {
258 	[VSD_DTYPE_VOISTATE] = sizeof(struct voistatdata_voistate),
259 	[VSD_DTYPE_INT_S32] = sizeof(struct voistatdata_int32),
260 	[VSD_DTYPE_INT_U32] = sizeof(struct voistatdata_int32),
261 	[VSD_DTYPE_INT_S64] = sizeof(struct voistatdata_int64),
262 	[VSD_DTYPE_INT_U64] = sizeof(struct voistatdata_int64),
263 	[VSD_DTYPE_INT_SLONG] = sizeof(struct voistatdata_intlong),
264 	[VSD_DTYPE_INT_ULONG] = sizeof(struct voistatdata_intlong),
265 	[VSD_DTYPE_Q_S32] = sizeof(struct voistatdata_q32),
266 	[VSD_DTYPE_Q_U32] = sizeof(struct voistatdata_q32),
267 	[VSD_DTYPE_Q_S64] = sizeof(struct voistatdata_q64),
268 	[VSD_DTYPE_Q_U64] = sizeof(struct voistatdata_q64),
269 	[VSD_DTYPE_CRHIST32] = sizeof(struct voistatdata_crhist32),
270 	[VSD_DTYPE_DRHIST32] = sizeof(struct voistatdata_drhist32),
271 	[VSD_DTYPE_DVHIST32] = sizeof(struct voistatdata_dvhist32),
272 	[VSD_DTYPE_CRHIST64] = sizeof(struct voistatdata_crhist64),
273 	[VSD_DTYPE_DRHIST64] = sizeof(struct voistatdata_drhist64),
274 	[VSD_DTYPE_DVHIST64] = sizeof(struct voistatdata_dvhist64),
275 	[VSD_DTYPE_TDGSTCLUST32] = sizeof(struct voistatdata_tdgstclust32),
276 	[VSD_DTYPE_TDGSTCLUST64] = sizeof(struct voistatdata_tdgstclust64),
277 };
278 
279 static const bool vsd_compoundtype[VSD_NUM_DTYPES] = {
280 	[VSD_DTYPE_VOISTATE] = true,
281 	[VSD_DTYPE_INT_S32] = false,
282 	[VSD_DTYPE_INT_U32] = false,
283 	[VSD_DTYPE_INT_S64] = false,
284 	[VSD_DTYPE_INT_U64] = false,
285 	[VSD_DTYPE_INT_SLONG] = false,
286 	[VSD_DTYPE_INT_ULONG] = false,
287 	[VSD_DTYPE_Q_S32] = false,
288 	[VSD_DTYPE_Q_U32] = false,
289 	[VSD_DTYPE_Q_S64] = false,
290 	[VSD_DTYPE_Q_U64] = false,
291 	[VSD_DTYPE_CRHIST32] = true,
292 	[VSD_DTYPE_DRHIST32] = true,
293 	[VSD_DTYPE_DVHIST32] = true,
294 	[VSD_DTYPE_CRHIST64] = true,
295 	[VSD_DTYPE_DRHIST64] = true,
296 	[VSD_DTYPE_DVHIST64] = true,
297 	[VSD_DTYPE_TDGSTCLUST32] = true,
298 	[VSD_DTYPE_TDGSTCLUST64] = true,
299 };
300 
301 const struct voistatdata_numeric numeric_limits[2][VSD_DTYPE_Q_U64 + 1] = {
302 	[LIM_MIN] = {
303 		[VSD_DTYPE_VOISTATE] = {0},
304 		[VSD_DTYPE_INT_S32] = {.int32 = {.s32 = INT32_MIN}},
305 		[VSD_DTYPE_INT_U32] = {.int32 = {.u32 = 0}},
306 		[VSD_DTYPE_INT_S64] = {.int64 = {.s64 = INT64_MIN}},
307 		[VSD_DTYPE_INT_U64] = {.int64 = {.u64 = 0}},
308 		[VSD_DTYPE_INT_SLONG] = {.intlong = {.slong = LONG_MIN}},
309 		[VSD_DTYPE_INT_ULONG] = {.intlong = {.ulong = 0}},
310 		[VSD_DTYPE_Q_S32] = {.q32 = {.sq32 = Q_IFMINVAL(INT32_MIN)}},
311 		[VSD_DTYPE_Q_U32] = {.q32 = {.uq32 = 0}},
312 		[VSD_DTYPE_Q_S64] = {.q64 = {.sq64 = Q_IFMINVAL(INT64_MIN)}},
313 		[VSD_DTYPE_Q_U64] = {.q64 = {.uq64 = 0}},
314 	},
315 	[LIM_MAX] = {
316 		[VSD_DTYPE_VOISTATE] = {0},
317 		[VSD_DTYPE_INT_S32] = {.int32 = {.s32 = INT32_MAX}},
318 		[VSD_DTYPE_INT_U32] = {.int32 = {.u32 = UINT32_MAX}},
319 		[VSD_DTYPE_INT_S64] = {.int64 = {.s64 = INT64_MAX}},
320 		[VSD_DTYPE_INT_U64] = {.int64 = {.u64 = UINT64_MAX}},
321 		[VSD_DTYPE_INT_SLONG] = {.intlong = {.slong = LONG_MAX}},
322 		[VSD_DTYPE_INT_ULONG] = {.intlong = {.ulong = ULONG_MAX}},
323 		[VSD_DTYPE_Q_S32] = {.q32 = {.sq32 = Q_IFMAXVAL(INT32_MAX)}},
324 		[VSD_DTYPE_Q_U32] = {.q32 = {.uq32 = Q_IFMAXVAL(UINT32_MAX)}},
325 		[VSD_DTYPE_Q_S64] = {.q64 = {.sq64 = Q_IFMAXVAL(INT64_MAX)}},
326 		[VSD_DTYPE_Q_U64] = {.q64 = {.uq64 = Q_IFMAXVAL(UINT64_MAX)}},
327 	}
328 };
329 
330 /* tpllistlock protects tpllist and ntpl */
331 static uint32_t ntpl;
332 static struct statsblob_tpl **tpllist;
333 
334 static inline void * stats_realloc(void *ptr, size_t oldsz, size_t newsz,
335     int flags);
336 //static void stats_v1_blob_finalise(struct statsblobv1 *sb);
337 static int stats_v1_blob_init_locked(struct statsblobv1 *sb, uint32_t tpl_id,
338     uint32_t flags);
339 static int stats_v1_blob_expand(struct statsblobv1 **sbpp, int newvoibytes,
340     int newvoistatbytes, int newvoistatdatabytes);
341 static void stats_v1_blob_iter(struct statsblobv1 *sb,
342     stats_v1_blob_itercb_t icb, void *usrctx, uint32_t flags);
343 static inline int stats_v1_vsd_tdgst_add(enum vsd_dtype vs_dtype,
344     struct voistatdata_tdgst *tdgst, s64q_t x, uint64_t weight, int attempt);
345 
346 static inline int
347 ctd32cmp(const struct voistatdata_tdgstctd32 *c1, const struct voistatdata_tdgstctd32 *c2)
348 {
349 
350 	KASSERT(Q_PRECEQ(c1->mu, c2->mu),
351 	    ("%s: Q_RELPREC(c1->mu,c2->mu)=%d", __func__,
352 	    Q_RELPREC(c1->mu, c2->mu)));
353 
354        return (Q_QLTQ(c1->mu, c2->mu) ? -1 : 1);
355 }
356 ARB_GENERATE_STATIC(ctdth32, voistatdata_tdgstctd32, ctdlnk, ctd32cmp);
357 
358 static inline int
359 ctd64cmp(const struct voistatdata_tdgstctd64 *c1, const struct voistatdata_tdgstctd64 *c2)
360 {
361 
362 	KASSERT(Q_PRECEQ(c1->mu, c2->mu),
363 	    ("%s: Q_RELPREC(c1->mu,c2->mu)=%d", __func__,
364 	    Q_RELPREC(c1->mu, c2->mu)));
365 
366        return (Q_QLTQ(c1->mu, c2->mu) ? -1 : 1);
367 }
368 ARB_GENERATE_STATIC(ctdth64, voistatdata_tdgstctd64, ctdlnk, ctd64cmp);
369 
370 #ifdef DIAGNOSTIC
371 RB_GENERATE_STATIC(rbctdth32, voistatdata_tdgstctd32, rblnk, ctd32cmp);
372 RB_GENERATE_STATIC(rbctdth64, voistatdata_tdgstctd64, rblnk, ctd64cmp);
373 #endif
374 
375 static inline sbintime_t
376 stats_sbinuptime(void)
377 {
378 	sbintime_t sbt;
379 #ifdef _KERNEL
380 
381 	sbt = sbinuptime();
382 #else /* ! _KERNEL */
383 	struct timespec tp;
384 
385 	clock_gettime(CLOCK_MONOTONIC_FAST, &tp);
386 	sbt = tstosbt(tp);
387 #endif /* _KERNEL */
388 
389 	return (sbt);
390 }
391 
392 static inline void *
393 stats_realloc(void *ptr, size_t oldsz, size_t newsz, int flags)
394 {
395 
396 #ifdef _KERNEL
397 	/* Default to M_NOWAIT if neither M_NOWAIT or M_WAITOK are set. */
398 	if (!(flags & (M_WAITOK | M_NOWAIT)))
399 		flags |= M_NOWAIT;
400 	ptr = realloc(ptr, newsz, M_STATS, flags);
401 #else /* ! _KERNEL */
402 	ptr = realloc(ptr, newsz);
403 	if ((flags & M_ZERO) && ptr != NULL) {
404 		if (oldsz == 0)
405 			memset(ptr, '\0', newsz);
406 		else if (newsz > oldsz)
407 			memset(BLOB_OFFSET(ptr, oldsz), '\0', newsz - oldsz);
408 	}
409 #endif /* _KERNEL */
410 
411 	return (ptr);
412 }
413 
414 static inline char *
415 stats_strdup(const char *s,
416 #ifdef _KERNEL
417     int flags)
418 {
419 	char *copy;
420 	size_t len;
421 
422 	if (!(flags & (M_WAITOK | M_NOWAIT)))
423 		flags |= M_NOWAIT;
424 
425 	len = strlen(s) + 1;
426 	if ((copy = malloc(len, M_STATS, flags)) != NULL)
427 		bcopy(s, copy, len);
428 
429 	return (copy);
430 #else
431     int flags __unused)
432 {
433 	return (strdup(s));
434 #endif
435 }
436 
437 static inline void
438 stats_tpl_update_hash(struct statsblob_tpl *tpl)
439 {
440 
441 	TPL_LIST_WLOCK_ASSERT();
442 	tpl->mb->tplhash = hash32_str(tpl->mb->tplname, 0);
443 	for (int voi_id = 0; voi_id < NVOIS(tpl->sb); voi_id++) {
444 		if (tpl->mb->voi_meta[voi_id].name != NULL)
445 			tpl->mb->tplhash = hash32_str(
446 			    tpl->mb->voi_meta[voi_id].name, tpl->mb->tplhash);
447 	}
448 	tpl->mb->tplhash = hash32_buf(tpl->sb, tpl->sb->cursz,
449 	    tpl->mb->tplhash);
450 }
451 
452 static inline uint64_t
453 stats_pow_u64(uint64_t base, uint64_t exp)
454 {
455 	uint64_t result = 1;
456 
457 	while (exp) {
458 		if (exp & 1)
459 			result *= base;
460 		exp >>= 1;
461 		base *= base;
462 	}
463 
464 	return (result);
465 }
466 
467 static inline int
468 stats_vss_hist_bkt_hlpr(struct vss_hist_hlpr_info *info, uint32_t curbkt,
469     struct voistatdata_numeric *bkt_lb, struct voistatdata_numeric *bkt_ub)
470 {
471 	uint64_t step = 0;
472 	int error = 0;
473 
474 	switch (info->scheme) {
475 	case BKT_LIN:
476 		step = info->lin.stepinc;
477 		break;
478 	case BKT_EXP:
479 		step = stats_pow_u64(info->exp.stepbase,
480 		    info->exp.stepexp + curbkt);
481 		break;
482 	case BKT_LINEXP:
483 		{
484 		uint64_t curstepexp = 1;
485 
486 		switch (info->voi_dtype) {
487 		case VSD_DTYPE_INT_S32:
488 			while ((int32_t)stats_pow_u64(info->linexp.stepbase,
489 			    curstepexp) <= bkt_lb->int32.s32)
490 				curstepexp++;
491 			break;
492 		case VSD_DTYPE_INT_U32:
493 			while ((uint32_t)stats_pow_u64(info->linexp.stepbase,
494 			    curstepexp) <= bkt_lb->int32.u32)
495 				curstepexp++;
496 			break;
497 		case VSD_DTYPE_INT_S64:
498 			while ((int64_t)stats_pow_u64(info->linexp.stepbase,
499 			    curstepexp) <= bkt_lb->int64.s64)
500 				curstepexp++;
501 			break;
502 		case VSD_DTYPE_INT_U64:
503 			while ((uint64_t)stats_pow_u64(info->linexp.stepbase,
504 			    curstepexp) <= bkt_lb->int64.u64)
505 				curstepexp++;
506 			break;
507 		case VSD_DTYPE_INT_SLONG:
508 			while ((long)stats_pow_u64(info->linexp.stepbase,
509 			    curstepexp) <= bkt_lb->intlong.slong)
510 				curstepexp++;
511 			break;
512 		case VSD_DTYPE_INT_ULONG:
513 			while ((unsigned long)stats_pow_u64(info->linexp.stepbase,
514 			    curstepexp) <= bkt_lb->intlong.ulong)
515 				curstepexp++;
516 			break;
517 		case VSD_DTYPE_Q_S32:
518 			while ((s32q_t)stats_pow_u64(info->linexp.stepbase,
519 			    curstepexp) <= Q_GIVAL(bkt_lb->q32.sq32))
520 			break;
521 		case VSD_DTYPE_Q_U32:
522 			while ((u32q_t)stats_pow_u64(info->linexp.stepbase,
523 			    curstepexp) <= Q_GIVAL(bkt_lb->q32.uq32))
524 			break;
525 		case VSD_DTYPE_Q_S64:
526 			while ((s64q_t)stats_pow_u64(info->linexp.stepbase,
527 			    curstepexp) <= Q_GIVAL(bkt_lb->q64.sq64))
528 				curstepexp++;
529 			break;
530 		case VSD_DTYPE_Q_U64:
531 			while ((u64q_t)stats_pow_u64(info->linexp.stepbase,
532 			    curstepexp) <= Q_GIVAL(bkt_lb->q64.uq64))
533 				curstepexp++;
534 			break;
535 		default:
536 			break;
537 		}
538 
539 		step = stats_pow_u64(info->linexp.stepbase, curstepexp) /
540 		    info->linexp.linstepdiv;
541 		if (step == 0)
542 			step = 1;
543 		break;
544 		}
545 	default:
546 		break;
547 	}
548 
549 	if (info->scheme == BKT_USR) {
550 		*bkt_lb = info->usr.bkts[curbkt].lb;
551 		*bkt_ub = info->usr.bkts[curbkt].ub;
552 	} else if (step != 0) {
553 		switch (info->voi_dtype) {
554 		case VSD_DTYPE_INT_S32:
555 			bkt_ub->int32.s32 += (int32_t)step;
556 			break;
557 		case VSD_DTYPE_INT_U32:
558 			bkt_ub->int32.u32 += (uint32_t)step;
559 			break;
560 		case VSD_DTYPE_INT_S64:
561 			bkt_ub->int64.s64 += (int64_t)step;
562 			break;
563 		case VSD_DTYPE_INT_U64:
564 			bkt_ub->int64.u64 += (uint64_t)step;
565 			break;
566 		case VSD_DTYPE_INT_SLONG:
567 			bkt_ub->intlong.slong += (long)step;
568 			break;
569 		case VSD_DTYPE_INT_ULONG:
570 			bkt_ub->intlong.ulong += (unsigned long)step;
571 			break;
572 		case VSD_DTYPE_Q_S32:
573 			error = Q_QADDI(&bkt_ub->q32.sq32, step);
574 			break;
575 		case VSD_DTYPE_Q_U32:
576 			error = Q_QADDI(&bkt_ub->q32.uq32, step);
577 			break;
578 		case VSD_DTYPE_Q_S64:
579 			error = Q_QADDI(&bkt_ub->q64.sq64, step);
580 			break;
581 		case VSD_DTYPE_Q_U64:
582 			error = Q_QADDI(&bkt_ub->q64.uq64, step);
583 			break;
584 		default:
585 			break;
586 		}
587 	} else { /* info->scheme != BKT_USR && step == 0 */
588 		return (EINVAL);
589 	}
590 
591 	return (error);
592 }
593 
594 static uint32_t
595 stats_vss_hist_nbkts_hlpr(struct vss_hist_hlpr_info *info)
596 {
597 	struct voistatdata_numeric bkt_lb, bkt_ub;
598 	uint32_t nbkts;
599 	int done;
600 
601 	if (info->scheme == BKT_USR) {
602 		/* XXXLAS: Setting info->{lb,ub} from macro is tricky. */
603 		info->lb = info->usr.bkts[0].lb;
604 		info->ub = info->usr.bkts[info->usr.nbkts - 1].lb;
605 	}
606 
607 	nbkts = 0;
608 	done = 0;
609 	bkt_ub = info->lb;
610 
611 	do {
612 		bkt_lb = bkt_ub;
613 		if (stats_vss_hist_bkt_hlpr(info, nbkts++, &bkt_lb, &bkt_ub))
614 			return (0);
615 
616 		if (info->scheme == BKT_USR)
617 			done = (nbkts == info->usr.nbkts);
618 		else {
619 			switch (info->voi_dtype) {
620 			case VSD_DTYPE_INT_S32:
621 				done = (bkt_ub.int32.s32 > info->ub.int32.s32);
622 				break;
623 			case VSD_DTYPE_INT_U32:
624 				done = (bkt_ub.int32.u32 > info->ub.int32.u32);
625 				break;
626 			case VSD_DTYPE_INT_S64:
627 				done = (bkt_ub.int64.s64 > info->ub.int64.s64);
628 				break;
629 			case VSD_DTYPE_INT_U64:
630 				done = (bkt_ub.int64.u64 > info->ub.int64.u64);
631 				break;
632 			case VSD_DTYPE_INT_SLONG:
633 				done = (bkt_ub.intlong.slong >
634 				    info->ub.intlong.slong);
635 				break;
636 			case VSD_DTYPE_INT_ULONG:
637 				done = (bkt_ub.intlong.ulong >
638 				    info->ub.intlong.ulong);
639 				break;
640 			case VSD_DTYPE_Q_S32:
641 				done = Q_QGTQ(bkt_ub.q32.sq32,
642 				    info->ub.q32.sq32);
643 				break;
644 			case VSD_DTYPE_Q_U32:
645 				done = Q_QGTQ(bkt_ub.q32.uq32,
646 				    info->ub.q32.uq32);
647 				break;
648 			case VSD_DTYPE_Q_S64:
649 				done = Q_QGTQ(bkt_ub.q64.sq64,
650 				    info->ub.q64.sq64);
651 				break;
652 			case VSD_DTYPE_Q_U64:
653 				done = Q_QGTQ(bkt_ub.q64.uq64,
654 				    info->ub.q64.uq64);
655 				break;
656 			default:
657 				return (0);
658 			}
659 		}
660 	} while (!done);
661 
662 	if (info->flags & VSD_HIST_LBOUND_INF)
663 		nbkts++;
664 	if (info->flags & VSD_HIST_UBOUND_INF)
665 		nbkts++;
666 
667 	return (nbkts);
668 }
669 
670 int
671 stats_vss_hist_hlpr(enum vsd_dtype voi_dtype, struct voistatspec *vss,
672     struct vss_hist_hlpr_info *info)
673 {
674 	struct voistatdata_hist *hist;
675 	struct voistatdata_numeric bkt_lb, bkt_ub, *lbinfbktlb, *lbinfbktub,
676 	    *ubinfbktlb, *ubinfbktub;
677 	uint32_t bkt, nbkts, nloop;
678 
679 	if (vss == NULL || info == NULL || (info->flags &
680 	(VSD_HIST_LBOUND_INF|VSD_HIST_UBOUND_INF) && (info->hist_dtype ==
681 	VSD_DTYPE_DVHIST32 || info->hist_dtype == VSD_DTYPE_DVHIST64)))
682 		return (EINVAL);
683 
684 	info->voi_dtype = voi_dtype;
685 
686 	if ((nbkts = stats_vss_hist_nbkts_hlpr(info)) == 0)
687 		return (EINVAL);
688 
689 	switch (info->hist_dtype) {
690 	case VSD_DTYPE_CRHIST32:
691 		vss->vsdsz = HIST_NBKTS2VSDSZ(crhist32, nbkts);
692 		break;
693 	case VSD_DTYPE_DRHIST32:
694 		vss->vsdsz = HIST_NBKTS2VSDSZ(drhist32, nbkts);
695 		break;
696 	case VSD_DTYPE_DVHIST32:
697 		vss->vsdsz = HIST_NBKTS2VSDSZ(dvhist32, nbkts);
698 		break;
699 	case VSD_DTYPE_CRHIST64:
700 		vss->vsdsz = HIST_NBKTS2VSDSZ(crhist64, nbkts);
701 		break;
702 	case VSD_DTYPE_DRHIST64:
703 		vss->vsdsz = HIST_NBKTS2VSDSZ(drhist64, nbkts);
704 		break;
705 	case VSD_DTYPE_DVHIST64:
706 		vss->vsdsz = HIST_NBKTS2VSDSZ(dvhist64, nbkts);
707 		break;
708 	default:
709 		return (EINVAL);
710 	}
711 
712 	vss->iv = stats_realloc(NULL, 0, vss->vsdsz, M_ZERO);
713 	if (vss->iv == NULL)
714 		return (ENOMEM);
715 
716 	hist = (struct voistatdata_hist *)vss->iv;
717 	bkt_ub = info->lb;
718 
719 	for (bkt = (info->flags & VSD_HIST_LBOUND_INF), nloop = 0;
720 	    bkt < nbkts;
721 	    bkt++, nloop++) {
722 		bkt_lb = bkt_ub;
723 		if (stats_vss_hist_bkt_hlpr(info, nloop, &bkt_lb, &bkt_ub))
724 			return (EINVAL);
725 
726 		switch (info->hist_dtype) {
727 		case VSD_DTYPE_CRHIST32:
728 			VSD(crhist32, hist)->bkts[bkt].lb = bkt_lb;
729 			break;
730 		case VSD_DTYPE_DRHIST32:
731 			VSD(drhist32, hist)->bkts[bkt].lb = bkt_lb;
732 			VSD(drhist32, hist)->bkts[bkt].ub = bkt_ub;
733 			break;
734 		case VSD_DTYPE_DVHIST32:
735 			VSD(dvhist32, hist)->bkts[bkt].val = bkt_lb;
736 			break;
737 		case VSD_DTYPE_CRHIST64:
738 			VSD(crhist64, hist)->bkts[bkt].lb = bkt_lb;
739 			break;
740 		case VSD_DTYPE_DRHIST64:
741 			VSD(drhist64, hist)->bkts[bkt].lb = bkt_lb;
742 			VSD(drhist64, hist)->bkts[bkt].ub = bkt_ub;
743 			break;
744 		case VSD_DTYPE_DVHIST64:
745 			VSD(dvhist64, hist)->bkts[bkt].val = bkt_lb;
746 			break;
747 		default:
748 			return (EINVAL);
749 		}
750 	}
751 
752 	lbinfbktlb = lbinfbktub = ubinfbktlb = ubinfbktub = NULL;
753 
754 	switch (info->hist_dtype) {
755 	case VSD_DTYPE_CRHIST32:
756 		lbinfbktlb = &VSD(crhist32, hist)->bkts[0].lb;
757 		ubinfbktlb = &VSD(crhist32, hist)->bkts[nbkts - 1].lb;
758 		break;
759 	case VSD_DTYPE_DRHIST32:
760 		lbinfbktlb = &VSD(drhist32, hist)->bkts[0].lb;
761 		lbinfbktub = &VSD(drhist32, hist)->bkts[0].ub;
762 		ubinfbktlb = &VSD(drhist32, hist)->bkts[nbkts - 1].lb;
763 		ubinfbktub = &VSD(drhist32, hist)->bkts[nbkts - 1].ub;
764 		break;
765 	case VSD_DTYPE_CRHIST64:
766 		lbinfbktlb = &VSD(crhist64, hist)->bkts[0].lb;
767 		ubinfbktlb = &VSD(crhist64, hist)->bkts[nbkts - 1].lb;
768 		break;
769 	case VSD_DTYPE_DRHIST64:
770 		lbinfbktlb = &VSD(drhist64, hist)->bkts[0].lb;
771 		lbinfbktub = &VSD(drhist64, hist)->bkts[0].ub;
772 		ubinfbktlb = &VSD(drhist64, hist)->bkts[nbkts - 1].lb;
773 		ubinfbktub = &VSD(drhist64, hist)->bkts[nbkts - 1].ub;
774 		break;
775 	case VSD_DTYPE_DVHIST32:
776 	case VSD_DTYPE_DVHIST64:
777 		break;
778 	default:
779 		return (EINVAL);
780 	}
781 
782 	if ((info->flags & VSD_HIST_LBOUND_INF) && lbinfbktlb) {
783 		*lbinfbktlb = numeric_limits[LIM_MIN][info->voi_dtype];
784 		/*
785 		 * Assignment from numeric_limit array for Q types assigns max
786 		 * possible integral/fractional value for underlying data type,
787 		 * but we must set control bits for this specific histogram per
788 		 * the user's choice of fractional bits, which we extract from
789 		 * info->lb.
790 		 */
791 		if (info->voi_dtype == VSD_DTYPE_Q_S32 ||
792 		    info->voi_dtype == VSD_DTYPE_Q_U32) {
793 			/* Signedness doesn't matter for setting control bits. */
794 			Q_SCVAL(lbinfbktlb->q32.sq32,
795 			    Q_GCVAL(info->lb.q32.sq32));
796 		} else if (info->voi_dtype == VSD_DTYPE_Q_S64 ||
797 		    info->voi_dtype == VSD_DTYPE_Q_U64) {
798 			/* Signedness doesn't matter for setting control bits. */
799 			Q_SCVAL(lbinfbktlb->q64.sq64,
800 			    Q_GCVAL(info->lb.q64.sq64));
801 		}
802 		if (lbinfbktub)
803 			*lbinfbktub = info->lb;
804 	}
805 	if ((info->flags & VSD_HIST_UBOUND_INF) && ubinfbktlb) {
806 		*ubinfbktlb = bkt_lb;
807 		if (ubinfbktub) {
808 			*ubinfbktub = numeric_limits[LIM_MAX][info->voi_dtype];
809 			if (info->voi_dtype == VSD_DTYPE_Q_S32 ||
810 			    info->voi_dtype == VSD_DTYPE_Q_U32) {
811 				Q_SCVAL(ubinfbktub->q32.sq32,
812 				    Q_GCVAL(info->lb.q32.sq32));
813 			} else if (info->voi_dtype == VSD_DTYPE_Q_S64 ||
814 			    info->voi_dtype == VSD_DTYPE_Q_U64) {
815 				Q_SCVAL(ubinfbktub->q64.sq64,
816 				    Q_GCVAL(info->lb.q64.sq64));
817 			}
818 		}
819 	}
820 
821 	return (0);
822 }
823 
824 int
825 stats_vss_tdgst_hlpr(enum vsd_dtype voi_dtype, struct voistatspec *vss,
826     struct vss_tdgst_hlpr_info *info)
827 {
828 	struct voistatdata_tdgst *tdgst;
829 	struct ctdth32 *ctd32tree;
830 	struct ctdth64 *ctd64tree;
831 	struct voistatdata_tdgstctd32 *ctd32;
832 	struct voistatdata_tdgstctd64 *ctd64;
833 
834 	info->voi_dtype = voi_dtype;
835 
836 	switch (info->tdgst_dtype) {
837 	case VSD_DTYPE_TDGSTCLUST32:
838 		vss->vsdsz = TDGST_NCTRS2VSDSZ(tdgstclust32, info->nctds);
839 		break;
840 	case VSD_DTYPE_TDGSTCLUST64:
841 		vss->vsdsz = TDGST_NCTRS2VSDSZ(tdgstclust64, info->nctds);
842 		break;
843 	default:
844 		return (EINVAL);
845 	}
846 
847 	vss->iv = stats_realloc(NULL, 0, vss->vsdsz, M_ZERO);
848 	if (vss->iv == NULL)
849 		return (ENOMEM);
850 
851 	tdgst = (struct voistatdata_tdgst *)vss->iv;
852 
853 	switch (info->tdgst_dtype) {
854 	case VSD_DTYPE_TDGSTCLUST32:
855 		ctd32tree = &VSD(tdgstclust32, tdgst)->ctdtree;
856 		ARB_INIT(ctd32, ctdlnk, ctd32tree, info->nctds) {
857 			Q_INI(&ctd32->mu, 0, 0, info->prec);
858 		}
859 		break;
860 	case VSD_DTYPE_TDGSTCLUST64:
861 		ctd64tree = &VSD(tdgstclust64, tdgst)->ctdtree;
862 		ARB_INIT(ctd64, ctdlnk, ctd64tree, info->nctds) {
863 			Q_INI(&ctd64->mu, 0, 0, info->prec);
864 		}
865 		break;
866 	default:
867 		return (EINVAL);
868 	}
869 
870 	return (0);
871 }
872 
873 int
874 stats_vss_numeric_hlpr(enum vsd_dtype voi_dtype, struct voistatspec *vss,
875     struct vss_numeric_hlpr_info *info)
876 {
877 	struct voistatdata_numeric iv;
878 
879 	switch (vss->stype) {
880 	case VS_STYPE_SUM:
881 		iv = stats_ctor_vsd_numeric(0);
882 		break;
883 	case VS_STYPE_MIN:
884 		iv = numeric_limits[LIM_MAX][voi_dtype];
885 		break;
886 	case VS_STYPE_MAX:
887 		iv = numeric_limits[LIM_MIN][voi_dtype];
888 		break;
889 	default:
890 		return (EINVAL);
891 	}
892 
893 	vss->iv = stats_realloc(NULL, 0, vsd_dtype2size[voi_dtype], 0);
894 	if (vss->iv == NULL)
895 		return (ENOMEM);
896 
897 	vss->vs_dtype = voi_dtype;
898 	vss->vsdsz = vsd_dtype2size[voi_dtype];
899 	switch (voi_dtype) {
900 	case VSD_DTYPE_INT_S32:
901 		*((int32_t *)vss->iv) = iv.int32.s32;
902 		break;
903 	case VSD_DTYPE_INT_U32:
904 		*((uint32_t *)vss->iv) = iv.int32.u32;
905 		break;
906 	case VSD_DTYPE_INT_S64:
907 		*((int64_t *)vss->iv) = iv.int64.s64;
908 		break;
909 	case VSD_DTYPE_INT_U64:
910 		*((uint64_t *)vss->iv) = iv.int64.u64;
911 		break;
912 	case VSD_DTYPE_INT_SLONG:
913 		*((long *)vss->iv) = iv.intlong.slong;
914 		break;
915 	case VSD_DTYPE_INT_ULONG:
916 		*((unsigned long *)vss->iv) = iv.intlong.ulong;
917 		break;
918 	case VSD_DTYPE_Q_S32:
919 		*((s32q_t *)vss->iv) = Q_SCVAL(iv.q32.sq32,
920 		    Q_CTRLINI(info->prec));
921 		break;
922 	case VSD_DTYPE_Q_U32:
923 		*((u32q_t *)vss->iv) = Q_SCVAL(iv.q32.uq32,
924 		    Q_CTRLINI(info->prec));
925 		break;
926 	case VSD_DTYPE_Q_S64:
927 		*((s64q_t *)vss->iv) = Q_SCVAL(iv.q64.sq64,
928 		    Q_CTRLINI(info->prec));
929 		break;
930 	case VSD_DTYPE_Q_U64:
931 		*((u64q_t *)vss->iv) = Q_SCVAL(iv.q64.uq64,
932 		    Q_CTRLINI(info->prec));
933 		break;
934 	default:
935 		break;
936 	}
937 
938 	return (0);
939 }
940 
941 int
942 stats_vss_hlpr_init(enum vsd_dtype voi_dtype, uint32_t nvss,
943     struct voistatspec *vss)
944 {
945 	int i, ret;
946 
947 	for (i = nvss - 1; i >= 0; i--) {
948 		if (vss[i].hlpr && (ret = vss[i].hlpr(voi_dtype, &vss[i],
949 		    vss[i].hlprinfo)) != 0)
950 			return (ret);
951 	}
952 
953 	return (0);
954 }
955 
956 void
957 stats_vss_hlpr_cleanup(uint32_t nvss, struct voistatspec *vss)
958 {
959 	int i;
960 
961 	for (i = nvss - 1; i >= 0; i--) {
962 		if (vss[i].hlpr) {
963 			stats_free((void *)vss[i].iv);
964 			vss[i].iv = NULL;
965 		}
966 	}
967 }
968 
969 int
970 stats_tpl_fetch(int tpl_id, struct statsblob_tpl **tpl)
971 {
972 	int error;
973 
974 	error = 0;
975 
976 	TPL_LIST_WLOCK();
977 	if (tpl_id < 0 || tpl_id >= (int)ntpl) {
978 		error = ENOENT;
979 	} else {
980 		*tpl = tpllist[tpl_id];
981 		/* XXXLAS: Acquire refcount on tpl. */
982 	}
983 	TPL_LIST_WUNLOCK();
984 
985 	return (error);
986 }
987 
988 int
989 stats_tpl_fetch_allocid(const char *name, uint32_t hash)
990 {
991 	int i, tpl_id;
992 
993 	tpl_id = -ESRCH;
994 
995 	TPL_LIST_RLOCK();
996 	for (i = ntpl - 1; i >= 0; i--) {
997 		if (name != NULL) {
998 			if (strlen(name) == strlen(tpllist[i]->mb->tplname) &&
999 			    strncmp(name, tpllist[i]->mb->tplname,
1000 			    TPL_MAX_NAME_LEN) == 0 && (!hash || hash ==
1001 			    tpllist[i]->mb->tplhash)) {
1002 				tpl_id = i;
1003 				break;
1004 			}
1005 		} else if (hash == tpllist[i]->mb->tplhash) {
1006 			tpl_id = i;
1007 			break;
1008 		}
1009 	}
1010 	TPL_LIST_RUNLOCK();
1011 
1012 	return (tpl_id);
1013 }
1014 
1015 int
1016 stats_tpl_id2name(uint32_t tpl_id, char *buf, size_t len)
1017 {
1018 	int error;
1019 
1020 	error = 0;
1021 
1022 	TPL_LIST_RLOCK();
1023 	if (tpl_id < ntpl) {
1024 		if (buf != NULL && len > strlen(tpllist[tpl_id]->mb->tplname))
1025 			strlcpy(buf, tpllist[tpl_id]->mb->tplname, len);
1026 		else
1027 			error = EOVERFLOW;
1028 	} else
1029 		error = ENOENT;
1030 	TPL_LIST_RUNLOCK();
1031 
1032 	return (error);
1033 }
1034 
1035 int
1036 stats_tpl_sample_rollthedice(struct stats_tpl_sample_rate *rates, int nrates,
1037     void *seed_bytes, size_t seed_len)
1038 {
1039 	uint32_t cum_pct, rnd_pct;
1040 	int i;
1041 
1042 	cum_pct = 0;
1043 
1044 	/*
1045 	 * Choose a pseudorandom or seeded number in range [0,100] and use
1046 	 * it to make a sampling decision and template selection where required.
1047 	 * If no seed is supplied, a PRNG is used to generate a pseudorandom
1048 	 * number so that every selection is independent. If a seed is supplied,
1049 	 * the caller desires random selection across different seeds, but
1050 	 * deterministic selection given the same seed. This is achieved by
1051 	 * hashing the seed and using the hash as the random number source.
1052 	 *
1053 	 * XXXLAS: Characterise hash function output distribution.
1054 	 */
1055 	if (seed_bytes == NULL)
1056 		rnd_pct = random() / (INT32_MAX / 100);
1057 	else
1058 		rnd_pct = hash32_buf(seed_bytes, seed_len, 0) /
1059 		    (UINT32_MAX / 100U);
1060 
1061 	/*
1062 	 * We map the randomly selected percentage on to the interval [0,100]
1063 	 * consisting of the cumulatively summed template sampling percentages.
1064 	 * The difference between the cumulative sum of all template sampling
1065 	 * percentages and 100 is treated as a NULL assignment i.e. no stats
1066 	 * template will be assigned, and -1 returned instead.
1067 	 */
1068 	for (i = 0; i < nrates; i++) {
1069 		cum_pct += rates[i].tpl_sample_pct;
1070 
1071 		KASSERT(cum_pct <= 100, ("%s cum_pct %u > 100", __func__,
1072 		    cum_pct));
1073 		if (rnd_pct > cum_pct || rates[i].tpl_sample_pct == 0)
1074 			continue;
1075 
1076 		return (rates[i].tpl_slot_id);
1077 	}
1078 
1079 	return (-1);
1080 }
1081 
1082 int
1083 stats_v1_blob_clone(struct statsblobv1 **dst, size_t dstmaxsz,
1084     struct statsblobv1 *src, uint32_t flags)
1085 {
1086 	int error;
1087 
1088 	error = 0;
1089 
1090 	if (src == NULL || dst == NULL ||
1091 	    src->cursz < sizeof(struct statsblob) ||
1092 	    ((flags & SB_CLONE_ALLOCDST) &&
1093 	    (flags & (SB_CLONE_USRDSTNOFAULT | SB_CLONE_USRDST)))) {
1094 		error = EINVAL;
1095 	} else if (flags & SB_CLONE_ALLOCDST) {
1096 		*dst = stats_realloc(NULL, 0, src->cursz, 0);
1097 		if (*dst)
1098 			(*dst)->maxsz = dstmaxsz = src->cursz;
1099 		else
1100 			error = ENOMEM;
1101 	} else if (*dst == NULL || dstmaxsz < sizeof(struct statsblob)) {
1102 		error = EINVAL;
1103 	}
1104 
1105 	if (!error) {
1106 		size_t postcurszlen;
1107 
1108 		/*
1109 		 * Clone src into dst except for the maxsz field. If dst is too
1110 		 * small to hold all of src, only copy src's header and return
1111 		 * EOVERFLOW.
1112 		 */
1113 #ifdef _KERNEL
1114 		if (flags & SB_CLONE_USRDSTNOFAULT)
1115 			copyout_nofault(src, *dst,
1116 			    offsetof(struct statsblob, maxsz));
1117 		else if (flags & SB_CLONE_USRDST)
1118 			copyout(src, *dst, offsetof(struct statsblob, maxsz));
1119 		else
1120 #endif
1121 			memcpy(*dst, src, offsetof(struct statsblob, maxsz));
1122 
1123 		if (dstmaxsz >= src->cursz) {
1124 			postcurszlen = src->cursz -
1125 			    offsetof(struct statsblob, cursz);
1126 		} else {
1127 			error = EOVERFLOW;
1128 			postcurszlen = sizeof(struct statsblob) -
1129 			    offsetof(struct statsblob, cursz);
1130 		}
1131 #ifdef _KERNEL
1132 		if (flags & SB_CLONE_USRDSTNOFAULT)
1133 			copyout_nofault(&(src->cursz), &((*dst)->cursz),
1134 			    postcurszlen);
1135 		else if (flags & SB_CLONE_USRDST)
1136 			copyout(&(src->cursz), &((*dst)->cursz), postcurszlen);
1137 		else
1138 #endif
1139 			memcpy(&((*dst)->cursz), &(src->cursz), postcurszlen);
1140 	}
1141 
1142 	return (error);
1143 }
1144 
1145 int
1146 stats_v1_tpl_alloc(const char *name, uint32_t flags __unused)
1147 {
1148 	struct statsblobv1_tpl *tpl, **newtpllist;
1149 	struct statsblobv1 *tpl_sb;
1150 	struct metablob *tpl_mb;
1151 	int tpl_id;
1152 
1153 	if (name != NULL && strlen(name) > TPL_MAX_NAME_LEN)
1154 		return (-EINVAL);
1155 
1156 	if (name != NULL && stats_tpl_fetch_allocid(name, 0) >= 0)
1157 		return (-EEXIST);
1158 
1159 	tpl = stats_realloc(NULL, 0, sizeof(struct statsblobv1_tpl), M_ZERO);
1160 	tpl_mb = stats_realloc(NULL, 0, sizeof(struct metablob), M_ZERO);
1161 	tpl_sb = stats_realloc(NULL, 0, sizeof(struct statsblobv1), M_ZERO);
1162 
1163 	if (tpl_mb != NULL && name != NULL)
1164 		tpl_mb->tplname = stats_strdup(name, 0);
1165 
1166 	if (tpl == NULL || tpl_sb == NULL || tpl_mb == NULL ||
1167 	    tpl_mb->tplname == NULL) {
1168 		stats_free(tpl);
1169 		stats_free(tpl_sb);
1170 		if (tpl_mb != NULL) {
1171 			stats_free(tpl_mb->tplname);
1172 			stats_free(tpl_mb);
1173 		}
1174 		return (-ENOMEM);
1175 	}
1176 
1177 	tpl->mb = tpl_mb;
1178 	tpl->sb = tpl_sb;
1179 
1180 	tpl_sb->abi = STATS_ABI_V1;
1181 	tpl_sb->endian =
1182 #if BYTE_ORDER == LITTLE_ENDIAN
1183 	    SB_LE;
1184 #elif BYTE_ORDER == BIG_ENDIAN
1185 	    SB_BE;
1186 #else
1187 	    SB_UE;
1188 #endif
1189 	tpl_sb->cursz = tpl_sb->maxsz = sizeof(struct statsblobv1);
1190 	tpl_sb->stats_off = tpl_sb->statsdata_off = sizeof(struct statsblobv1);
1191 
1192 	TPL_LIST_WLOCK();
1193 	newtpllist = stats_realloc(tpllist, ntpl * sizeof(void *),
1194 	    (ntpl + 1) * sizeof(void *), 0);
1195 	if (newtpllist != NULL) {
1196 		tpl_id = ntpl++;
1197 		tpllist = (struct statsblob_tpl **)newtpllist;
1198 		tpllist[tpl_id] = (struct statsblob_tpl *)tpl;
1199 		stats_tpl_update_hash(tpllist[tpl_id]);
1200 	} else {
1201 		stats_free(tpl);
1202 		stats_free(tpl_sb);
1203 		if (tpl_mb != NULL) {
1204 			stats_free(tpl_mb->tplname);
1205 			stats_free(tpl_mb);
1206 		}
1207 		tpl_id = -ENOMEM;
1208 	}
1209 	TPL_LIST_WUNLOCK();
1210 
1211 	return (tpl_id);
1212 }
1213 
1214 int
1215 stats_v1_tpl_add_voistats(uint32_t tpl_id, int32_t voi_id, const char *voi_name,
1216     enum vsd_dtype voi_dtype, uint32_t nvss, struct voistatspec *vss,
1217     uint32_t flags)
1218 {
1219 	struct voi *voi;
1220 	struct voistat *tmpstat;
1221 	struct statsblobv1 *tpl_sb;
1222 	struct metablob *tpl_mb;
1223 	int error, i, newstatdataidx, newvoibytes, newvoistatbytes,
1224 	    newvoistatdatabytes, newvoistatmaxid;
1225 	uint32_t nbytes;
1226 
1227 	if (voi_id < 0 || voi_dtype == 0 || voi_dtype >= VSD_NUM_DTYPES ||
1228 	    nvss == 0 || vss == NULL)
1229 		return (EINVAL);
1230 
1231 	error = nbytes = newvoibytes = newvoistatbytes =
1232 	    newvoistatdatabytes = 0;
1233 	newvoistatmaxid = -1;
1234 
1235 	/* Calculate the number of bytes required for the new voistats. */
1236 	for (i = nvss - 1; i >= 0; i--) {
1237 		if (vss[i].stype == 0 || vss[i].stype >= VS_NUM_STYPES ||
1238 		    vss[i].vs_dtype == 0 || vss[i].vs_dtype >= VSD_NUM_DTYPES ||
1239 		    vss[i].iv == NULL || vss[i].vsdsz == 0)
1240 			return (EINVAL);
1241 		if ((int)vss[i].stype > newvoistatmaxid)
1242 			newvoistatmaxid = vss[i].stype;
1243 		newvoistatdatabytes += vss[i].vsdsz;
1244 	}
1245 
1246 	if (flags & SB_VOI_RELUPDATE) {
1247 		/* XXXLAS: VOI state bytes may need to vary based on stat types. */
1248 		newvoistatdatabytes += sizeof(struct voistatdata_voistate);
1249 	}
1250 	nbytes += newvoistatdatabytes;
1251 
1252 	TPL_LIST_WLOCK();
1253 	if (tpl_id < ntpl) {
1254 		tpl_sb = (struct statsblobv1 *)tpllist[tpl_id]->sb;
1255 		tpl_mb = tpllist[tpl_id]->mb;
1256 
1257 		if (voi_id >= NVOIS(tpl_sb) || tpl_sb->vois[voi_id].id == -1) {
1258 			/* Adding a new VOI and associated stats. */
1259 			if (voi_id >= NVOIS(tpl_sb)) {
1260 				/* We need to grow the tpl_sb->vois array. */
1261 				newvoibytes = (voi_id - (NVOIS(tpl_sb) - 1)) *
1262 				    sizeof(struct voi);
1263 				nbytes += newvoibytes;
1264 			}
1265 			newvoistatbytes =
1266 			    (newvoistatmaxid + 1) * sizeof(struct voistat);
1267 		} else {
1268 			/* Adding stats to an existing VOI. */
1269 			if (newvoistatmaxid >
1270 			    tpl_sb->vois[voi_id].voistatmaxid) {
1271 				newvoistatbytes = (newvoistatmaxid -
1272 				    tpl_sb->vois[voi_id].voistatmaxid) *
1273 				    sizeof(struct voistat);
1274 			}
1275 			/* XXXLAS: KPI does not yet support expanding VOIs. */
1276 			error = EOPNOTSUPP;
1277 		}
1278 		nbytes += newvoistatbytes;
1279 
1280 		if (!error && newvoibytes > 0) {
1281 			struct voi_meta *voi_meta = tpl_mb->voi_meta;
1282 
1283 			voi_meta = stats_realloc(voi_meta, voi_meta == NULL ?
1284 			    0 : NVOIS(tpl_sb) * sizeof(struct voi_meta),
1285 			    (1 + voi_id) * sizeof(struct voi_meta),
1286 			    M_ZERO);
1287 
1288 			if (voi_meta == NULL)
1289 				error = ENOMEM;
1290 			else
1291 				tpl_mb->voi_meta = voi_meta;
1292 		}
1293 
1294 		if (!error) {
1295 			/* NB: Resizing can change where tpl_sb points. */
1296 			error = stats_v1_blob_expand(&tpl_sb, newvoibytes,
1297 			    newvoistatbytes, newvoistatdatabytes);
1298 		}
1299 
1300 		if (!error) {
1301 			tpl_mb->voi_meta[voi_id].name = stats_strdup(voi_name,
1302 			    0);
1303 			if (tpl_mb->voi_meta[voi_id].name == NULL)
1304 				error = ENOMEM;
1305 		}
1306 
1307 		if (!error) {
1308 			/* Update the template list with the resized pointer. */
1309 			tpllist[tpl_id]->sb = (struct statsblob *)tpl_sb;
1310 
1311 			/* Update the template. */
1312 			voi = &tpl_sb->vois[voi_id];
1313 
1314 			if (voi->id < 0) {
1315 				/* VOI is new and needs to be initialised. */
1316 				voi->id = voi_id;
1317 				voi->dtype = voi_dtype;
1318 				voi->stats_off = tpl_sb->stats_off;
1319 				if (flags & SB_VOI_RELUPDATE)
1320 					voi->flags |= VOI_REQSTATE;
1321 			} else {
1322 				/*
1323 				 * XXXLAS: When this else block is written, the
1324 				 * "KPI does not yet support expanding VOIs"
1325 				 * error earlier in this function can be
1326 				 * removed. What is required here is to shuffle
1327 				 * the voistat array such that the new stats for
1328 				 * the voi are contiguous, which will displace
1329 				 * stats for other vois that reside after the
1330 				 * voi being updated. The other vois then need
1331 				 * to have their stats_off adjusted post
1332 				 * shuffle.
1333 				 */
1334 			}
1335 
1336 			voi->voistatmaxid = newvoistatmaxid;
1337 			newstatdataidx = 0;
1338 
1339 			if (voi->flags & VOI_REQSTATE) {
1340 				/* Initialise the voistate stat in slot 0. */
1341 				tmpstat = BLOB_OFFSET(tpl_sb, voi->stats_off);
1342 				tmpstat->stype = VS_STYPE_VOISTATE;
1343 				tmpstat->flags = 0;
1344 				tmpstat->dtype = VSD_DTYPE_VOISTATE;
1345 				newstatdataidx = tmpstat->dsz =
1346 				    sizeof(struct voistatdata_numeric);
1347 				tmpstat->data_off = tpl_sb->statsdata_off;
1348 			}
1349 
1350 			for (i = 0; (uint32_t)i < nvss; i++) {
1351 				tmpstat = BLOB_OFFSET(tpl_sb, voi->stats_off +
1352 				    (vss[i].stype * sizeof(struct voistat)));
1353 				KASSERT(tmpstat->stype < 0, ("voistat %p "
1354 				    "already initialised", tmpstat));
1355 				tmpstat->stype = vss[i].stype;
1356 				tmpstat->flags = vss[i].flags;
1357 				tmpstat->dtype = vss[i].vs_dtype;
1358 				tmpstat->dsz = vss[i].vsdsz;
1359 				tmpstat->data_off = tpl_sb->statsdata_off +
1360 				    newstatdataidx;
1361 				memcpy(BLOB_OFFSET(tpl_sb, tmpstat->data_off),
1362 				    vss[i].iv, vss[i].vsdsz);
1363 				newstatdataidx += vss[i].vsdsz;
1364 			}
1365 
1366 			/* Update the template version hash. */
1367 			stats_tpl_update_hash(tpllist[tpl_id]);
1368 			/* XXXLAS: Confirm tpl name/hash pair remains unique. */
1369 		}
1370 	} else
1371 		error = EINVAL;
1372 	TPL_LIST_WUNLOCK();
1373 
1374 	return (error);
1375 }
1376 
1377 struct statsblobv1 *
1378 stats_v1_blob_alloc(uint32_t tpl_id, uint32_t flags __unused)
1379 {
1380 	struct statsblobv1 *sb;
1381 	int error;
1382 
1383 	sb = NULL;
1384 
1385 	TPL_LIST_RLOCK();
1386 	if (tpl_id < ntpl) {
1387 		sb = stats_realloc(NULL, 0, tpllist[tpl_id]->sb->maxsz, 0);
1388 		if (sb != NULL) {
1389 			sb->maxsz = tpllist[tpl_id]->sb->maxsz;
1390 			error = stats_v1_blob_init_locked(sb, tpl_id, 0);
1391 		} else
1392 			error = ENOMEM;
1393 
1394 		if (error) {
1395 			stats_free(sb);
1396 			sb = NULL;
1397 		}
1398 	}
1399 	TPL_LIST_RUNLOCK();
1400 
1401 	return (sb);
1402 }
1403 
1404 void
1405 stats_v1_blob_destroy(struct statsblobv1 *sb)
1406 {
1407 
1408 	stats_free(sb);
1409 }
1410 
1411 int
1412 stats_v1_voistat_fetch_dptr(struct statsblobv1 *sb, int32_t voi_id,
1413     enum voi_stype stype, enum vsd_dtype *retdtype, struct voistatdata **retvsd,
1414     size_t *retvsdsz)
1415 {
1416 	struct voi *v;
1417 	struct voistat *vs;
1418 
1419 	if (retvsd == NULL || sb == NULL || sb->abi != STATS_ABI_V1 ||
1420 	    voi_id >= NVOIS(sb))
1421 		return (EINVAL);
1422 
1423 	v = &sb->vois[voi_id];
1424 	if ((__typeof(v->voistatmaxid))stype > v->voistatmaxid)
1425 		return (EINVAL);
1426 
1427 	vs = BLOB_OFFSET(sb, v->stats_off + (stype * sizeof(struct voistat)));
1428 	*retvsd = BLOB_OFFSET(sb, vs->data_off);
1429 	if (retdtype != NULL)
1430 		*retdtype = vs->dtype;
1431 	if (retvsdsz != NULL)
1432 		*retvsdsz = vs->dsz;
1433 
1434 	return (0);
1435 }
1436 
1437 int
1438 stats_v1_blob_init(struct statsblobv1 *sb, uint32_t tpl_id, uint32_t flags)
1439 {
1440 	int error;
1441 
1442 	error = 0;
1443 
1444 	TPL_LIST_RLOCK();
1445 	if (sb == NULL || tpl_id >= ntpl) {
1446 		error = EINVAL;
1447 	} else {
1448 		error = stats_v1_blob_init_locked(sb, tpl_id, flags);
1449 	}
1450 	TPL_LIST_RUNLOCK();
1451 
1452 	return (error);
1453 }
1454 
1455 static inline int
1456 stats_v1_blob_init_locked(struct statsblobv1 *sb, uint32_t tpl_id,
1457     uint32_t flags __unused)
1458 {
1459 	int error;
1460 
1461 	TPL_LIST_RLOCK_ASSERT();
1462 	error = (sb->maxsz >= tpllist[tpl_id]->sb->cursz) ? 0 : EOVERFLOW;
1463 	KASSERT(!error,
1464 	    ("sb %d instead of %d bytes", sb->maxsz, tpllist[tpl_id]->sb->cursz));
1465 
1466 	if (!error) {
1467 		memcpy(sb, tpllist[tpl_id]->sb, tpllist[tpl_id]->sb->cursz);
1468 		sb->created = sb->lastrst = stats_sbinuptime();
1469 		sb->tplhash = tpllist[tpl_id]->mb->tplhash;
1470 	}
1471 
1472 	return (error);
1473 }
1474 
1475 static int
1476 stats_v1_blob_expand(struct statsblobv1 **sbpp, int newvoibytes,
1477     int newvoistatbytes, int newvoistatdatabytes)
1478 {
1479 	struct statsblobv1 *sb;
1480 	struct voi *tmpvoi;
1481 	struct voistat *tmpvoistat, *voistat_array;
1482 	int error, i, idxnewvois, idxnewvoistats, nbytes, nvoistats;
1483 
1484 	KASSERT(newvoibytes % sizeof(struct voi) == 0,
1485 	    ("Bad newvoibytes %d", newvoibytes));
1486 	KASSERT(newvoistatbytes % sizeof(struct voistat) == 0,
1487 	    ("Bad newvoistatbytes %d", newvoistatbytes));
1488 
1489 	error = ((newvoibytes % sizeof(struct voi) == 0) &&
1490 	    (newvoistatbytes % sizeof(struct voistat) == 0)) ? 0 : EINVAL;
1491 	sb = *sbpp;
1492 	nbytes = newvoibytes + newvoistatbytes + newvoistatdatabytes;
1493 
1494 	/*
1495 	 * XXXLAS: Required until we gain support for flags which alter the
1496 	 * units of size/offset fields in key structs.
1497 	 */
1498 	if (!error && ((((int)sb->cursz) + nbytes) > SB_V1_MAXSZ))
1499 		error = EFBIG;
1500 
1501 	if (!error && (sb->cursz + nbytes > sb->maxsz)) {
1502 		/* Need to expand our blob. */
1503 		sb = stats_realloc(sb, sb->maxsz, sb->cursz + nbytes, M_ZERO);
1504 		if (sb != NULL) {
1505 			sb->maxsz = sb->cursz + nbytes;
1506 			*sbpp = sb;
1507 		} else
1508 		    error = ENOMEM;
1509 	}
1510 
1511 	if (!error) {
1512 		/*
1513 		 * Shuffle memory within the expanded blob working from the end
1514 		 * backwards, leaving gaps for the new voistat and voistatdata
1515 		 * structs at the beginning of their respective blob regions,
1516 		 * and for the new voi structs at the end of their blob region.
1517 		 */
1518 		memmove(BLOB_OFFSET(sb, sb->statsdata_off + nbytes),
1519 		    BLOB_OFFSET(sb, sb->statsdata_off),
1520 		    sb->cursz - sb->statsdata_off);
1521 		memmove(BLOB_OFFSET(sb, sb->stats_off + newvoibytes +
1522 		    newvoistatbytes), BLOB_OFFSET(sb, sb->stats_off),
1523 		    sb->statsdata_off - sb->stats_off);
1524 
1525 		/* First index of new voi/voistat structs to be initialised. */
1526 		idxnewvois = NVOIS(sb);
1527 		idxnewvoistats = (newvoistatbytes / sizeof(struct voistat)) - 1;
1528 
1529 		/* Update housekeeping variables and offsets. */
1530 		sb->cursz += nbytes;
1531 		sb->stats_off += newvoibytes;
1532 		sb->statsdata_off += newvoibytes + newvoistatbytes;
1533 
1534 		/* XXXLAS: Zeroing not strictly needed but aids debugging. */
1535 		memset(&sb->vois[idxnewvois], '\0', newvoibytes);
1536 		memset(BLOB_OFFSET(sb, sb->stats_off), '\0',
1537 		    newvoistatbytes);
1538 		memset(BLOB_OFFSET(sb, sb->statsdata_off), '\0',
1539 		    newvoistatdatabytes);
1540 
1541 		/* Initialise new voi array members and update offsets. */
1542 		for (i = 0; i < NVOIS(sb); i++) {
1543 			tmpvoi = &sb->vois[i];
1544 			if (i >= idxnewvois) {
1545 				tmpvoi->id = tmpvoi->voistatmaxid = -1;
1546 			} else if (tmpvoi->id > -1) {
1547 				tmpvoi->stats_off += newvoibytes +
1548 				    newvoistatbytes;
1549 			}
1550 		}
1551 
1552 		/* Initialise new voistat array members and update offsets. */
1553 		nvoistats = (sb->statsdata_off - sb->stats_off) /
1554 		    sizeof(struct voistat);
1555 		voistat_array = BLOB_OFFSET(sb, sb->stats_off);
1556 		for (i = 0; i < nvoistats; i++) {
1557 			tmpvoistat = &voistat_array[i];
1558 			if (i <= idxnewvoistats) {
1559 				tmpvoistat->stype = -1;
1560 			} else if (tmpvoistat->stype > -1) {
1561 				tmpvoistat->data_off += nbytes;
1562 			}
1563 		}
1564 	}
1565 
1566 	return (error);
1567 }
1568 
1569 static void
1570 stats_v1_blob_finalise(struct statsblobv1 *sb __unused)
1571 {
1572 
1573 	/* XXXLAS: Fill this in. */
1574 }
1575 
1576 static void
1577 stats_v1_blob_iter(struct statsblobv1 *sb, stats_v1_blob_itercb_t icb,
1578     void *usrctx, uint32_t flags)
1579 {
1580 	struct voi *v;
1581 	struct voistat *vs;
1582 	struct sb_iter_ctx ctx;
1583 	int i, j, firstvoi;
1584 
1585 	ctx.usrctx = usrctx;
1586 	ctx.flags = SB_IT_FIRST_CB;
1587 	firstvoi = 1;
1588 
1589 	for (i = 0; i < NVOIS(sb); i++) {
1590 		v = &sb->vois[i];
1591 		ctx.vslot = i;
1592 		ctx.vsslot = -1;
1593 		ctx.flags |= SB_IT_FIRST_VOISTAT;
1594 
1595 		if (firstvoi)
1596 			ctx.flags |= SB_IT_FIRST_VOI;
1597 		else if (i == (NVOIS(sb) - 1))
1598 			ctx.flags |= SB_IT_LAST_VOI | SB_IT_LAST_CB;
1599 
1600 		if (v->id < 0 && (flags & SB_IT_NULLVOI)) {
1601 			if (icb(sb, v, NULL, &ctx))
1602 				return;
1603 			firstvoi = 0;
1604 			ctx.flags &= ~SB_IT_FIRST_CB;
1605 		}
1606 
1607 		/* If NULL voi, v->voistatmaxid == -1 */
1608 		for (j = 0; j <= v->voistatmaxid; j++) {
1609 			vs = &((struct voistat *)BLOB_OFFSET(sb,
1610 			    v->stats_off))[j];
1611 			if (vs->stype < 0 &&
1612 			    !(flags & SB_IT_NULLVOISTAT))
1613 				continue;
1614 
1615 			if (j == v->voistatmaxid) {
1616 				ctx.flags |= SB_IT_LAST_VOISTAT;
1617 				if (i == (NVOIS(sb) - 1))
1618 					ctx.flags |=
1619 					    SB_IT_LAST_CB;
1620 			} else
1621 				ctx.flags &= ~SB_IT_LAST_CB;
1622 
1623 			ctx.vsslot = j;
1624 			if (icb(sb, v, vs, &ctx))
1625 				return;
1626 
1627 			ctx.flags &= ~(SB_IT_FIRST_CB | SB_IT_FIRST_VOISTAT |
1628 			    SB_IT_LAST_VOISTAT);
1629 		}
1630 		ctx.flags &= ~(SB_IT_FIRST_VOI | SB_IT_LAST_VOI);
1631 	}
1632 }
1633 
1634 static inline void
1635 stats_voistatdata_tdgst_tostr(enum vsd_dtype voi_dtype __unused,
1636     const struct voistatdata_tdgst *tdgst, enum vsd_dtype tdgst_dtype,
1637     size_t tdgst_dsz __unused, enum sb_str_fmt fmt, struct sbuf *buf, int objdump)
1638 {
1639 	const struct ctdth32 *ctd32tree;
1640 	const struct ctdth64 *ctd64tree;
1641 	const struct voistatdata_tdgstctd32 *ctd32;
1642 	const struct voistatdata_tdgstctd64 *ctd64;
1643 	const char *fmtstr;
1644 	uint64_t smplcnt, compcnt;
1645 	int is32bit, qmaxstrlen;
1646 	uint16_t maxctds, curctds;
1647 
1648 	switch (tdgst_dtype) {
1649 	case VSD_DTYPE_TDGSTCLUST32:
1650 		smplcnt = CONSTVSD(tdgstclust32, tdgst)->smplcnt;
1651 		compcnt = CONSTVSD(tdgstclust32, tdgst)->compcnt;
1652 		maxctds = ARB_MAXNODES(&CONSTVSD(tdgstclust32, tdgst)->ctdtree);
1653 		curctds = ARB_CURNODES(&CONSTVSD(tdgstclust32, tdgst)->ctdtree);
1654 		ctd32tree = &CONSTVSD(tdgstclust32, tdgst)->ctdtree;
1655 		ctd32 = (objdump ? ARB_CNODE(ctd32tree, 0) :
1656 		    ARB_CMIN(ctdth32, ctd32tree));
1657 		qmaxstrlen = (ctd32 == NULL) ? 1 : Q_MAXSTRLEN(ctd32->mu, 10);
1658 		is32bit = 1;
1659 		ctd64tree = NULL;
1660 		ctd64 = NULL;
1661 		break;
1662 	case VSD_DTYPE_TDGSTCLUST64:
1663 		smplcnt = CONSTVSD(tdgstclust64, tdgst)->smplcnt;
1664 		compcnt = CONSTVSD(tdgstclust64, tdgst)->compcnt;
1665 		maxctds = ARB_MAXNODES(&CONSTVSD(tdgstclust64, tdgst)->ctdtree);
1666 		curctds = ARB_CURNODES(&CONSTVSD(tdgstclust64, tdgst)->ctdtree);
1667 		ctd64tree = &CONSTVSD(tdgstclust64, tdgst)->ctdtree;
1668 		ctd64 = (objdump ? ARB_CNODE(ctd64tree, 0) :
1669 		    ARB_CMIN(ctdth64, ctd64tree));
1670 		qmaxstrlen = (ctd64 == NULL) ? 1 : Q_MAXSTRLEN(ctd64->mu, 10);
1671 		is32bit = 0;
1672 		ctd32tree = NULL;
1673 		ctd32 = NULL;
1674 		break;
1675 	default:
1676 		return;
1677 	}
1678 
1679 	switch (fmt) {
1680 	case SB_STRFMT_FREEFORM:
1681 		fmtstr = "smplcnt=%ju, compcnt=%ju, maxctds=%hu, nctds=%hu";
1682 		break;
1683 	case SB_STRFMT_JSON:
1684 	default:
1685 		fmtstr =
1686 		    "\"smplcnt\":%ju,\"compcnt\":%ju,\"maxctds\":%hu,"
1687 		    "\"nctds\":%hu,\"ctds\":[";
1688 		break;
1689 	}
1690 	sbuf_printf(buf, fmtstr, (uintmax_t)smplcnt, (uintmax_t)compcnt,
1691 	    maxctds, curctds);
1692 
1693 	while ((is32bit ? NULL != ctd32 : NULL != ctd64)) {
1694 		char qstr[qmaxstrlen];
1695 
1696 		switch (fmt) {
1697 		case SB_STRFMT_FREEFORM:
1698 			fmtstr = "\n\t\t\t\t";
1699 			break;
1700 		case SB_STRFMT_JSON:
1701 		default:
1702 			fmtstr = "{";
1703 			break;
1704 		}
1705 		sbuf_cat(buf, fmtstr);
1706 
1707 		if (objdump) {
1708 			switch (fmt) {
1709 			case SB_STRFMT_FREEFORM:
1710 				fmtstr = "ctd[%hu].";
1711 				break;
1712 			case SB_STRFMT_JSON:
1713 			default:
1714 				fmtstr = "\"ctd\":%hu,";
1715 				break;
1716 			}
1717 			sbuf_printf(buf, fmtstr, is32bit ?
1718 			    ARB_SELFIDX(ctd32tree, ctd32) :
1719 			    ARB_SELFIDX(ctd64tree, ctd64));
1720 		}
1721 
1722 		switch (fmt) {
1723 		case SB_STRFMT_FREEFORM:
1724 			fmtstr = "{mu=";
1725 			break;
1726 		case SB_STRFMT_JSON:
1727 		default:
1728 			fmtstr = "\"mu\":";
1729 			break;
1730 		}
1731 		sbuf_cat(buf, fmtstr);
1732 		Q_TOSTR((is32bit ? ctd32->mu : ctd64->mu), -1, 10, qstr,
1733 		    sizeof(qstr));
1734 		sbuf_cat(buf, qstr);
1735 
1736 		switch (fmt) {
1737 		case SB_STRFMT_FREEFORM:
1738 			fmtstr = is32bit ? ",cnt=%u}" : ",cnt=%ju}";
1739 			break;
1740 		case SB_STRFMT_JSON:
1741 		default:
1742 			fmtstr = is32bit ? ",\"cnt\":%u}" : ",\"cnt\":%ju}";
1743 			break;
1744 		}
1745 		sbuf_printf(buf, fmtstr,
1746 		    is32bit ? ctd32->cnt : (uintmax_t)ctd64->cnt);
1747 
1748 		if (is32bit)
1749 			ctd32 = (objdump ? ARB_CNODE(ctd32tree,
1750 			    ARB_SELFIDX(ctd32tree, ctd32) + 1) :
1751 			    ARB_CNEXT(ctdth32, ctd32tree, ctd32));
1752 		else
1753 			ctd64 = (objdump ? ARB_CNODE(ctd64tree,
1754 			    ARB_SELFIDX(ctd64tree, ctd64) + 1) :
1755 			    ARB_CNEXT(ctdth64, ctd64tree, ctd64));
1756 
1757 		if (fmt == SB_STRFMT_JSON &&
1758 		    (is32bit ? NULL != ctd32 : NULL != ctd64))
1759 			sbuf_putc(buf, ',');
1760 	}
1761 	if (fmt == SB_STRFMT_JSON)
1762 		sbuf_cat(buf, "]");
1763 }
1764 
1765 static inline void
1766 stats_voistatdata_hist_tostr(enum vsd_dtype voi_dtype,
1767     const struct voistatdata_hist *hist, enum vsd_dtype hist_dtype,
1768     size_t hist_dsz, enum sb_str_fmt fmt, struct sbuf *buf, int objdump)
1769 {
1770 	const struct voistatdata_numeric *bkt_lb, *bkt_ub;
1771 	const char *fmtstr;
1772 	int is32bit;
1773 	uint16_t i, nbkts;
1774 
1775 	switch (hist_dtype) {
1776 	case VSD_DTYPE_CRHIST32:
1777 		nbkts = HIST_VSDSZ2NBKTS(crhist32, hist_dsz);
1778 		is32bit = 1;
1779 		break;
1780 	case VSD_DTYPE_DRHIST32:
1781 		nbkts = HIST_VSDSZ2NBKTS(drhist32, hist_dsz);
1782 		is32bit = 1;
1783 		break;
1784 	case VSD_DTYPE_DVHIST32:
1785 		nbkts = HIST_VSDSZ2NBKTS(dvhist32, hist_dsz);
1786 		is32bit = 1;
1787 		break;
1788 	case VSD_DTYPE_CRHIST64:
1789 		nbkts = HIST_VSDSZ2NBKTS(crhist64, hist_dsz);
1790 		is32bit = 0;
1791 		break;
1792 	case VSD_DTYPE_DRHIST64:
1793 		nbkts = HIST_VSDSZ2NBKTS(drhist64, hist_dsz);
1794 		is32bit = 0;
1795 		break;
1796 	case VSD_DTYPE_DVHIST64:
1797 		nbkts = HIST_VSDSZ2NBKTS(dvhist64, hist_dsz);
1798 		is32bit = 0;
1799 		break;
1800 	default:
1801 		return;
1802 	}
1803 
1804 	switch (fmt) {
1805 	case SB_STRFMT_FREEFORM:
1806 		fmtstr = "nbkts=%hu, ";
1807 		break;
1808 	case SB_STRFMT_JSON:
1809 	default:
1810 		fmtstr = "\"nbkts\":%hu,";
1811 		break;
1812 	}
1813 	sbuf_printf(buf, fmtstr, nbkts);
1814 
1815 	switch (fmt) {
1816 		case SB_STRFMT_FREEFORM:
1817 			fmtstr = (is32bit ? "oob=%u" : "oob=%ju");
1818 			break;
1819 		case SB_STRFMT_JSON:
1820 		default:
1821 			fmtstr = (is32bit ? "\"oob\":%u,\"bkts\":[" :
1822 			    "\"oob\":%ju,\"bkts\":[");
1823 			break;
1824 	}
1825 	sbuf_printf(buf, fmtstr, is32bit ? VSD_CONSTHIST_FIELDVAL(hist,
1826 	    hist_dtype, oob) : (uintmax_t)VSD_CONSTHIST_FIELDVAL(hist,
1827 	    hist_dtype, oob));
1828 
1829 	for (i = 0; i < nbkts; i++) {
1830 		switch (hist_dtype) {
1831 		case VSD_DTYPE_CRHIST32:
1832 		case VSD_DTYPE_CRHIST64:
1833 			bkt_lb = VSD_CONSTCRHIST_FIELDPTR(hist, hist_dtype,
1834 			    bkts[i].lb);
1835 			if (i < nbkts - 1)
1836 				bkt_ub = VSD_CONSTCRHIST_FIELDPTR(hist,
1837 				    hist_dtype, bkts[i + 1].lb);
1838 			else
1839 				bkt_ub = &numeric_limits[LIM_MAX][voi_dtype];
1840 			break;
1841 		case VSD_DTYPE_DRHIST32:
1842 		case VSD_DTYPE_DRHIST64:
1843 			bkt_lb = VSD_CONSTDRHIST_FIELDPTR(hist, hist_dtype,
1844 			    bkts[i].lb);
1845 			bkt_ub = VSD_CONSTDRHIST_FIELDPTR(hist, hist_dtype,
1846 			    bkts[i].ub);
1847 			break;
1848 		case VSD_DTYPE_DVHIST32:
1849 		case VSD_DTYPE_DVHIST64:
1850 			bkt_lb = bkt_ub = VSD_CONSTDVHIST_FIELDPTR(hist,
1851 			    hist_dtype, bkts[i].val);
1852 			break;
1853 		default:
1854 			break;
1855 		}
1856 
1857 		switch (fmt) {
1858 		case SB_STRFMT_FREEFORM:
1859 			fmtstr = "\n\t\t\t\t";
1860 			break;
1861 		case SB_STRFMT_JSON:
1862 		default:
1863 			fmtstr = "{";
1864 			break;
1865 		}
1866 		sbuf_cat(buf, fmtstr);
1867 
1868 		if (objdump) {
1869 			switch (fmt) {
1870 			case SB_STRFMT_FREEFORM:
1871 				fmtstr = "bkt[%hu].";
1872 				break;
1873 			case SB_STRFMT_JSON:
1874 			default:
1875 				fmtstr = "\"bkt\":%hu,";
1876 				break;
1877 			}
1878 			sbuf_printf(buf, fmtstr, i);
1879 		}
1880 
1881 		switch (fmt) {
1882 		case SB_STRFMT_FREEFORM:
1883 			fmtstr = "{lb=";
1884 			break;
1885 		case SB_STRFMT_JSON:
1886 		default:
1887 			fmtstr = "\"lb\":";
1888 			break;
1889 		}
1890 		sbuf_cat(buf, fmtstr);
1891 		stats_voistatdata_tostr((const struct voistatdata *)bkt_lb,
1892 		    voi_dtype, voi_dtype, sizeof(struct voistatdata_numeric),
1893 		    fmt, buf, objdump);
1894 
1895 		switch (fmt) {
1896 		case SB_STRFMT_FREEFORM:
1897 			fmtstr = ",ub=";
1898 			break;
1899 		case SB_STRFMT_JSON:
1900 		default:
1901 			fmtstr = ",\"ub\":";
1902 			break;
1903 		}
1904 		sbuf_cat(buf, fmtstr);
1905 		stats_voistatdata_tostr((const struct voistatdata *)bkt_ub,
1906 		    voi_dtype, voi_dtype, sizeof(struct voistatdata_numeric),
1907 		    fmt, buf, objdump);
1908 
1909 		switch (fmt) {
1910 		case SB_STRFMT_FREEFORM:
1911 			fmtstr = is32bit ? ",cnt=%u}" : ",cnt=%ju}";
1912 			break;
1913 		case SB_STRFMT_JSON:
1914 		default:
1915 			fmtstr = is32bit ? ",\"cnt\":%u}" : ",\"cnt\":%ju}";
1916 			break;
1917 		}
1918 		sbuf_printf(buf, fmtstr, is32bit ?
1919 		    VSD_CONSTHIST_FIELDVAL(hist, hist_dtype, bkts[i].cnt) :
1920 		    (uintmax_t)VSD_CONSTHIST_FIELDVAL(hist, hist_dtype,
1921 		    bkts[i].cnt));
1922 
1923 		if (fmt == SB_STRFMT_JSON && i < nbkts - 1)
1924 			sbuf_putc(buf, ',');
1925 	}
1926 	if (fmt == SB_STRFMT_JSON)
1927 		sbuf_cat(buf, "]");
1928 }
1929 
1930 int
1931 stats_voistatdata_tostr(const struct voistatdata *vsd, enum vsd_dtype voi_dtype,
1932     enum vsd_dtype vsd_dtype, size_t vsd_sz, enum sb_str_fmt fmt,
1933     struct sbuf *buf, int objdump)
1934 {
1935 	const char *fmtstr;
1936 
1937 	if (vsd == NULL || buf == NULL || voi_dtype >= VSD_NUM_DTYPES ||
1938 	    vsd_dtype >= VSD_NUM_DTYPES || fmt >= SB_STRFMT_NUM_FMTS)
1939 		return (EINVAL);
1940 
1941 	switch (vsd_dtype) {
1942 	case VSD_DTYPE_VOISTATE:
1943 		switch (fmt) {
1944 		case SB_STRFMT_FREEFORM:
1945 			fmtstr = "prev=";
1946 			break;
1947 		case SB_STRFMT_JSON:
1948 		default:
1949 			fmtstr = "\"prev\":";
1950 			break;
1951 		}
1952 		sbuf_cat(buf, fmtstr);
1953 		/*
1954 		 * Render prev by passing it as *vsd and voi_dtype as vsd_dtype.
1955 		 */
1956 		stats_voistatdata_tostr(
1957 		    (const struct voistatdata *)&CONSTVSD(voistate, vsd)->prev,
1958 		    voi_dtype, voi_dtype, vsd_sz, fmt, buf, objdump);
1959 		break;
1960 	case VSD_DTYPE_INT_S32:
1961 		sbuf_printf(buf, "%d", vsd->int32.s32);
1962 		break;
1963 	case VSD_DTYPE_INT_U32:
1964 		sbuf_printf(buf, "%u", vsd->int32.u32);
1965 		break;
1966 	case VSD_DTYPE_INT_S64:
1967 		sbuf_printf(buf, "%jd", (intmax_t)vsd->int64.s64);
1968 		break;
1969 	case VSD_DTYPE_INT_U64:
1970 		sbuf_printf(buf, "%ju", (uintmax_t)vsd->int64.u64);
1971 		break;
1972 	case VSD_DTYPE_INT_SLONG:
1973 		sbuf_printf(buf, "%ld", vsd->intlong.slong);
1974 		break;
1975 	case VSD_DTYPE_INT_ULONG:
1976 		sbuf_printf(buf, "%lu", vsd->intlong.ulong);
1977 		break;
1978 	case VSD_DTYPE_Q_S32:
1979 		{
1980 		char qstr[Q_MAXSTRLEN(vsd->q32.sq32, 10)];
1981 		Q_TOSTR((s32q_t)vsd->q32.sq32, -1, 10, qstr, sizeof(qstr));
1982 		sbuf_cat(buf, qstr);
1983 		}
1984 		break;
1985 	case VSD_DTYPE_Q_U32:
1986 		{
1987 		char qstr[Q_MAXSTRLEN(vsd->q32.uq32, 10)];
1988 		Q_TOSTR((u32q_t)vsd->q32.uq32, -1, 10, qstr, sizeof(qstr));
1989 		sbuf_cat(buf, qstr);
1990 		}
1991 		break;
1992 	case VSD_DTYPE_Q_S64:
1993 		{
1994 		char qstr[Q_MAXSTRLEN(vsd->q64.sq64, 10)];
1995 		Q_TOSTR((s64q_t)vsd->q64.sq64, -1, 10, qstr, sizeof(qstr));
1996 		sbuf_cat(buf, qstr);
1997 		}
1998 		break;
1999 	case VSD_DTYPE_Q_U64:
2000 		{
2001 		char qstr[Q_MAXSTRLEN(vsd->q64.uq64, 10)];
2002 		Q_TOSTR((u64q_t)vsd->q64.uq64, -1, 10, qstr, sizeof(qstr));
2003 		sbuf_cat(buf, qstr);
2004 		}
2005 		break;
2006 	case VSD_DTYPE_CRHIST32:
2007 	case VSD_DTYPE_DRHIST32:
2008 	case VSD_DTYPE_DVHIST32:
2009 	case VSD_DTYPE_CRHIST64:
2010 	case VSD_DTYPE_DRHIST64:
2011 	case VSD_DTYPE_DVHIST64:
2012 		stats_voistatdata_hist_tostr(voi_dtype, CONSTVSD(hist, vsd),
2013 		    vsd_dtype, vsd_sz, fmt, buf, objdump);
2014 		break;
2015 	case VSD_DTYPE_TDGSTCLUST32:
2016 	case VSD_DTYPE_TDGSTCLUST64:
2017 		stats_voistatdata_tdgst_tostr(voi_dtype,
2018 		    CONSTVSD(tdgst, vsd), vsd_dtype, vsd_sz, fmt, buf,
2019 		    objdump);
2020 		break;
2021 	default:
2022 		break;
2023 	}
2024 
2025 	return (sbuf_error(buf));
2026 }
2027 
2028 static void
2029 stats_v1_itercb_tostr_freeform(struct statsblobv1 *sb, struct voi *v,
2030     struct voistat *vs, struct sb_iter_ctx *ctx)
2031 {
2032 	struct sb_tostrcb_ctx *sctx;
2033 	struct metablob *tpl_mb;
2034 	struct sbuf *buf;
2035 	void *vsd;
2036 	uint8_t dump;
2037 
2038 	sctx = ctx->usrctx;
2039 	buf = sctx->buf;
2040 	tpl_mb = sctx->tpl ? sctx->tpl->mb : NULL;
2041 	dump = ((sctx->flags & SB_TOSTR_OBJDUMP) != 0);
2042 
2043 	if (ctx->flags & SB_IT_FIRST_CB) {
2044 		sbuf_printf(buf, "struct statsblobv1@%p", sb);
2045 		if (dump) {
2046 			sbuf_printf(buf, ", abi=%hhu, endian=%hhu, maxsz=%hu, "
2047 			    "cursz=%hu, created=%jd, lastrst=%jd, flags=0x%04hx, "
2048 			    "stats_off=%hu, statsdata_off=%hu",
2049 			    sb->abi, sb->endian, sb->maxsz, sb->cursz,
2050 			    sb->created, sb->lastrst, sb->flags, sb->stats_off,
2051 			    sb->statsdata_off);
2052 		}
2053 		sbuf_printf(buf, ", tplhash=%u", sb->tplhash);
2054 	}
2055 
2056 	if (ctx->flags & SB_IT_FIRST_VOISTAT) {
2057 		sbuf_printf(buf, "\n\tvois[%hd]: id=%hd", ctx->vslot, v->id);
2058 		if (v->id < 0)
2059 			return;
2060 		sbuf_printf(buf, ", name=\"%s\"", (tpl_mb == NULL) ? "" :
2061 		    tpl_mb->voi_meta[v->id].name);
2062 		if (dump)
2063 		    sbuf_printf(buf, ", flags=0x%04hx, dtype=%s, "
2064 		    "voistatmaxid=%hhd, stats_off=%hu", v->flags,
2065 		    vsd_dtype2name[v->dtype], v->voistatmaxid, v->stats_off);
2066 	}
2067 
2068 	if (!dump && vs->stype <= 0)
2069 		return;
2070 
2071 	sbuf_printf(buf, "\n\t\tvois[%hd]stat[%hhd]: stype=", v->id, ctx->vsslot);
2072 	if (vs->stype < 0) {
2073 		sbuf_printf(buf, "%hhd", vs->stype);
2074 		return;
2075 	} else
2076 		sbuf_printf(buf, "%s, errs=%hu", vs_stype2name[vs->stype],
2077 		    vs->errs);
2078 	vsd = BLOB_OFFSET(sb, vs->data_off);
2079 	if (dump)
2080 		sbuf_printf(buf, ", flags=0x%04x, dtype=%s, dsz=%hu, "
2081 		    "data_off=%hu", vs->flags, vsd_dtype2name[vs->dtype],
2082 		    vs->dsz, vs->data_off);
2083 
2084 	sbuf_printf(buf, "\n\t\t\tvoistatdata: ");
2085 	stats_voistatdata_tostr(vsd, v->dtype, vs->dtype, vs->dsz,
2086 	    sctx->fmt, buf, dump);
2087 }
2088 
2089 static void
2090 stats_v1_itercb_tostr_json(struct statsblobv1 *sb, struct voi *v, struct voistat *vs,
2091     struct sb_iter_ctx *ctx)
2092 {
2093 	struct sb_tostrcb_ctx *sctx;
2094 	struct metablob *tpl_mb;
2095 	struct sbuf *buf;
2096 	const char *fmtstr;
2097 	void *vsd;
2098 	uint8_t dump;
2099 
2100 	sctx = ctx->usrctx;
2101 	buf = sctx->buf;
2102 	tpl_mb = sctx->tpl ? sctx->tpl->mb : NULL;
2103 	dump = ((sctx->flags & SB_TOSTR_OBJDUMP) != 0);
2104 
2105 	if (ctx->flags & SB_IT_FIRST_CB) {
2106 		sbuf_putc(buf, '{');
2107 		if (dump) {
2108 			sbuf_printf(buf, "\"abi\":%hhu,\"endian\":%hhu,"
2109 			    "\"maxsz\":%hu,\"cursz\":%hu,\"created\":%jd,"
2110 			    "\"lastrst\":%jd,\"flags\":%hu,\"stats_off\":%hu,"
2111 			    "\"statsdata_off\":%hu,", sb->abi,
2112 			    sb->endian, sb->maxsz, sb->cursz, sb->created,
2113 			    sb->lastrst, sb->flags, sb->stats_off,
2114 			    sb->statsdata_off);
2115 		}
2116 
2117 		if (tpl_mb == NULL)
2118 			fmtstr = "\"tplname\":%s,\"tplhash\":%u,\"vois\":{";
2119 		else
2120 			fmtstr = "\"tplname\":\"%s\",\"tplhash\":%u,\"vois\":{";
2121 
2122 		sbuf_printf(buf, fmtstr, tpl_mb ? tpl_mb->tplname : "null",
2123 		    sb->tplhash);
2124 	}
2125 
2126 	if (ctx->flags & SB_IT_FIRST_VOISTAT) {
2127 		if (dump) {
2128 			sbuf_printf(buf, "\"[%d]\":{\"id\":%d", ctx->vslot,
2129 			    v->id);
2130 			if (v->id < 0) {
2131 				sbuf_printf(buf, "},");
2132 				return;
2133 			}
2134 
2135 			if (tpl_mb == NULL)
2136 				fmtstr = ",\"name\":%s,\"flags\":%hu,"
2137 				    "\"dtype\":\"%s\",\"voistatmaxid\":%hhd,"
2138 				    "\"stats_off\":%hu,";
2139 			else
2140 				fmtstr = ",\"name\":\"%s\",\"flags\":%hu,"
2141 				    "\"dtype\":\"%s\",\"voistatmaxid\":%hhd,"
2142 				    "\"stats_off\":%hu,";
2143 
2144 			sbuf_printf(buf, fmtstr, tpl_mb ?
2145 			    tpl_mb->voi_meta[v->id].name : "null", v->flags,
2146 			    vsd_dtype2name[v->dtype], v->voistatmaxid,
2147 			    v->stats_off);
2148 		} else {
2149 			if (tpl_mb == NULL) {
2150 				sbuf_printf(buf, "\"[%hd]\":{", v->id);
2151 			} else {
2152 				sbuf_printf(buf, "\"%s\":{",
2153 				    tpl_mb->voi_meta[v->id].name);
2154 			}
2155 		}
2156 		sbuf_cat(buf, "\"stats\":{");
2157 	}
2158 
2159 	vsd = BLOB_OFFSET(sb, vs->data_off);
2160 	if (dump) {
2161 		sbuf_printf(buf, "\"[%hhd]\":", ctx->vsslot);
2162 		if (vs->stype < 0) {
2163 			sbuf_printf(buf, "{\"stype\":-1},");
2164 			return;
2165 		}
2166 		sbuf_printf(buf, "{\"stype\":\"%s\",\"errs\":%hu,\"flags\":%hu,"
2167 		    "\"dtype\":\"%s\",\"data_off\":%hu,\"voistatdata\":{",
2168 		    vs_stype2name[vs->stype], vs->errs, vs->flags,
2169 		    vsd_dtype2name[vs->dtype], vs->data_off);
2170 	} else if (vs->stype > 0) {
2171 		if (tpl_mb == NULL)
2172 			sbuf_printf(buf, "\"[%hhd]\":", vs->stype);
2173 		else
2174 			sbuf_printf(buf, "\"%s\":", vs_stype2name[vs->stype]);
2175 	} else
2176 		return;
2177 
2178 	if ((vs->flags & VS_VSDVALID) || dump) {
2179 		if (!dump)
2180 			sbuf_printf(buf, "{\"errs\":%hu,", vs->errs);
2181 		/* Simple non-compound VSD types need a key. */
2182 		if (!vsd_compoundtype[vs->dtype])
2183 			sbuf_cat(buf, "\"val\":");
2184 		stats_voistatdata_tostr(vsd, v->dtype, vs->dtype, vs->dsz,
2185 		    sctx->fmt, buf, dump);
2186 		sbuf_cat(buf, dump ? "}}" : "}");
2187 	} else
2188 		sbuf_cat(buf, dump ? "null}" : "null");
2189 
2190 	if (ctx->flags & SB_IT_LAST_VOISTAT)
2191 		sbuf_cat(buf, "}}");
2192 
2193 	if (ctx->flags & SB_IT_LAST_CB)
2194 		sbuf_cat(buf, "}}");
2195 	else
2196 		sbuf_putc(buf, ',');
2197 }
2198 
2199 static int
2200 stats_v1_itercb_tostr(struct statsblobv1 *sb, struct voi *v, struct voistat *vs,
2201     struct sb_iter_ctx *ctx)
2202 {
2203 	struct sb_tostrcb_ctx *sctx;
2204 
2205 	sctx = ctx->usrctx;
2206 
2207 	switch (sctx->fmt) {
2208 	case SB_STRFMT_FREEFORM:
2209 		stats_v1_itercb_tostr_freeform(sb, v, vs, ctx);
2210 		break;
2211 	case SB_STRFMT_JSON:
2212 		stats_v1_itercb_tostr_json(sb, v, vs, ctx);
2213 		break;
2214 	default:
2215 		break;
2216 	}
2217 
2218 	return (sbuf_error(sctx->buf));
2219 }
2220 
2221 int
2222 stats_v1_blob_tostr(struct statsblobv1 *sb, struct sbuf *buf,
2223     enum sb_str_fmt fmt, uint32_t flags)
2224 {
2225 	struct sb_tostrcb_ctx sctx;
2226 	uint32_t iflags;
2227 
2228 	if (sb == NULL || sb->abi != STATS_ABI_V1 || buf == NULL ||
2229 	    fmt >= SB_STRFMT_NUM_FMTS)
2230 		return (EINVAL);
2231 
2232 	sctx.buf = buf;
2233 	sctx.fmt = fmt;
2234 	sctx.flags = flags;
2235 
2236 	if (flags & SB_TOSTR_META) {
2237 		if (stats_tpl_fetch(stats_tpl_fetch_allocid(NULL, sb->tplhash),
2238 		    &sctx.tpl))
2239 			return (EINVAL);
2240 	} else
2241 		sctx.tpl = NULL;
2242 
2243 	iflags = 0;
2244 	if (flags & SB_TOSTR_OBJDUMP)
2245 		iflags |= (SB_IT_NULLVOI | SB_IT_NULLVOISTAT);
2246 	stats_v1_blob_iter(sb, stats_v1_itercb_tostr, &sctx, iflags);
2247 
2248 	return (sbuf_error(buf));
2249 }
2250 
2251 static int
2252 stats_v1_itercb_visit(struct statsblobv1 *sb, struct voi *v,
2253     struct voistat *vs, struct sb_iter_ctx *ctx)
2254 {
2255 	struct sb_visitcb_ctx *vctx;
2256 	struct sb_visit sbv;
2257 
2258 	vctx = ctx->usrctx;
2259 
2260 	sbv.tplhash = sb->tplhash;
2261 	sbv.voi_id = v->id;
2262 	sbv.voi_dtype = v->dtype;
2263 	sbv.vs_stype = vs->stype;
2264 	sbv.vs_dtype = vs->dtype;
2265 	sbv.vs_dsz = vs->dsz;
2266 	sbv.vs_data = BLOB_OFFSET(sb, vs->data_off);
2267 	sbv.vs_errs = vs->errs;
2268 	sbv.flags = ctx->flags & (SB_IT_FIRST_CB | SB_IT_LAST_CB |
2269 	    SB_IT_FIRST_VOI | SB_IT_LAST_VOI | SB_IT_FIRST_VOISTAT |
2270 	    SB_IT_LAST_VOISTAT);
2271 
2272 	return (vctx->cb(&sbv, vctx->usrctx));
2273 }
2274 
2275 int
2276 stats_v1_blob_visit(struct statsblobv1 *sb, stats_blob_visitcb_t func,
2277     void *usrctx)
2278 {
2279 	struct sb_visitcb_ctx vctx;
2280 
2281 	if (sb == NULL || sb->abi != STATS_ABI_V1 || func == NULL)
2282 		return (EINVAL);
2283 
2284 	vctx.cb = func;
2285 	vctx.usrctx = usrctx;
2286 
2287 	stats_v1_blob_iter(sb, stats_v1_itercb_visit, &vctx, 0);
2288 
2289 	return (0);
2290 }
2291 
2292 static int
2293 stats_v1_icb_reset_voistat(struct statsblobv1 *sb, struct voi *v __unused,
2294     struct voistat *vs, struct sb_iter_ctx *ctx __unused)
2295 {
2296 	void *vsd;
2297 
2298 	if (vs->stype == VS_STYPE_VOISTATE)
2299 		return (0);
2300 
2301 	vsd = BLOB_OFFSET(sb, vs->data_off);
2302 
2303 	/* Perform the stat type's default reset action. */
2304 	switch (vs->stype) {
2305 	case VS_STYPE_SUM:
2306 		switch (vs->dtype) {
2307 		case VSD_DTYPE_Q_S32:
2308 			Q_SIFVAL(VSD(q32, vsd)->sq32, 0);
2309 			break;
2310 		case VSD_DTYPE_Q_U32:
2311 			Q_SIFVAL(VSD(q32, vsd)->uq32, 0);
2312 			break;
2313 		case VSD_DTYPE_Q_S64:
2314 			Q_SIFVAL(VSD(q64, vsd)->sq64, 0);
2315 			break;
2316 		case VSD_DTYPE_Q_U64:
2317 			Q_SIFVAL(VSD(q64, vsd)->uq64, 0);
2318 			break;
2319 		default:
2320 			bzero(vsd, vs->dsz);
2321 			break;
2322 		}
2323 		break;
2324 	case VS_STYPE_MAX:
2325 		switch (vs->dtype) {
2326 		case VSD_DTYPE_Q_S32:
2327 			Q_SIFVAL(VSD(q32, vsd)->sq32,
2328 			    Q_IFMINVAL(VSD(q32, vsd)->sq32));
2329 			break;
2330 		case VSD_DTYPE_Q_U32:
2331 			Q_SIFVAL(VSD(q32, vsd)->uq32,
2332 			    Q_IFMINVAL(VSD(q32, vsd)->uq32));
2333 			break;
2334 		case VSD_DTYPE_Q_S64:
2335 			Q_SIFVAL(VSD(q64, vsd)->sq64,
2336 			    Q_IFMINVAL(VSD(q64, vsd)->sq64));
2337 			break;
2338 		case VSD_DTYPE_Q_U64:
2339 			Q_SIFVAL(VSD(q64, vsd)->uq64,
2340 			    Q_IFMINVAL(VSD(q64, vsd)->uq64));
2341 			break;
2342 		default:
2343 			memcpy(vsd, &numeric_limits[LIM_MIN][vs->dtype],
2344 			    vs->dsz);
2345 			break;
2346 		}
2347 		break;
2348 	case VS_STYPE_MIN:
2349 		switch (vs->dtype) {
2350 		case VSD_DTYPE_Q_S32:
2351 			Q_SIFVAL(VSD(q32, vsd)->sq32,
2352 			    Q_IFMAXVAL(VSD(q32, vsd)->sq32));
2353 			break;
2354 		case VSD_DTYPE_Q_U32:
2355 			Q_SIFVAL(VSD(q32, vsd)->uq32,
2356 			    Q_IFMAXVAL(VSD(q32, vsd)->uq32));
2357 			break;
2358 		case VSD_DTYPE_Q_S64:
2359 			Q_SIFVAL(VSD(q64, vsd)->sq64,
2360 			    Q_IFMAXVAL(VSD(q64, vsd)->sq64));
2361 			break;
2362 		case VSD_DTYPE_Q_U64:
2363 			Q_SIFVAL(VSD(q64, vsd)->uq64,
2364 			    Q_IFMAXVAL(VSD(q64, vsd)->uq64));
2365 			break;
2366 		default:
2367 			memcpy(vsd, &numeric_limits[LIM_MAX][vs->dtype],
2368 			    vs->dsz);
2369 			break;
2370 		}
2371 		break;
2372 	case VS_STYPE_HIST:
2373 		{
2374 		/* Reset bucket counts. */
2375 		struct voistatdata_hist *hist;
2376 		int i, is32bit;
2377 		uint16_t nbkts;
2378 
2379 		hist = VSD(hist, vsd);
2380 		switch (vs->dtype) {
2381 		case VSD_DTYPE_CRHIST32:
2382 			nbkts = HIST_VSDSZ2NBKTS(crhist32, vs->dsz);
2383 			is32bit = 1;
2384 			break;
2385 		case VSD_DTYPE_DRHIST32:
2386 			nbkts = HIST_VSDSZ2NBKTS(drhist32, vs->dsz);
2387 			is32bit = 1;
2388 			break;
2389 		case VSD_DTYPE_DVHIST32:
2390 			nbkts = HIST_VSDSZ2NBKTS(dvhist32, vs->dsz);
2391 			is32bit = 1;
2392 			break;
2393 		case VSD_DTYPE_CRHIST64:
2394 			nbkts = HIST_VSDSZ2NBKTS(crhist64, vs->dsz);
2395 			is32bit = 0;
2396 			break;
2397 		case VSD_DTYPE_DRHIST64:
2398 			nbkts = HIST_VSDSZ2NBKTS(drhist64, vs->dsz);
2399 			is32bit = 0;
2400 			break;
2401 		case VSD_DTYPE_DVHIST64:
2402 			nbkts = HIST_VSDSZ2NBKTS(dvhist64, vs->dsz);
2403 			is32bit = 0;
2404 			break;
2405 		default:
2406 			return (0);
2407 		}
2408 
2409 		bzero(VSD_HIST_FIELDPTR(hist, vs->dtype, oob),
2410 		    is32bit ? sizeof(uint32_t) : sizeof(uint64_t));
2411 		for (i = nbkts - 1; i >= 0; i--) {
2412 			bzero(VSD_HIST_FIELDPTR(hist, vs->dtype,
2413 			    bkts[i].cnt), is32bit ? sizeof(uint32_t) :
2414 			    sizeof(uint64_t));
2415 		}
2416 		break;
2417 		}
2418 	case VS_STYPE_TDGST:
2419 		{
2420 		/* Reset sample count centroids array/tree. */
2421 		struct voistatdata_tdgst *tdgst;
2422 		struct ctdth32 *ctd32tree;
2423 		struct ctdth64 *ctd64tree;
2424 		struct voistatdata_tdgstctd32 *ctd32;
2425 		struct voistatdata_tdgstctd64 *ctd64;
2426 
2427 		tdgst = VSD(tdgst, vsd);
2428 		switch (vs->dtype) {
2429 		case VSD_DTYPE_TDGSTCLUST32:
2430 			VSD(tdgstclust32, tdgst)->smplcnt = 0;
2431 			VSD(tdgstclust32, tdgst)->compcnt = 0;
2432 			ctd32tree = &VSD(tdgstclust32, tdgst)->ctdtree;
2433 			ARB_INIT(ctd32, ctdlnk, ctd32tree,
2434 			    ARB_MAXNODES(ctd32tree)) {
2435 				ctd32->cnt = 0;
2436 				Q_SIFVAL(ctd32->mu, 0);
2437 			}
2438 #ifdef DIAGNOSTIC
2439 			RB_INIT(&VSD(tdgstclust32, tdgst)->rbctdtree);
2440 #endif
2441 		break;
2442 		case VSD_DTYPE_TDGSTCLUST64:
2443 			VSD(tdgstclust64, tdgst)->smplcnt = 0;
2444 			VSD(tdgstclust64, tdgst)->compcnt = 0;
2445 			ctd64tree = &VSD(tdgstclust64, tdgst)->ctdtree;
2446 			ARB_INIT(ctd64, ctdlnk, ctd64tree,
2447 			    ARB_MAXNODES(ctd64tree)) {
2448 				ctd64->cnt = 0;
2449 				Q_SIFVAL(ctd64->mu, 0);
2450 			}
2451 #ifdef DIAGNOSTIC
2452 			RB_INIT(&VSD(tdgstclust64, tdgst)->rbctdtree);
2453 #endif
2454 		break;
2455 		default:
2456 			return (0);
2457 		}
2458 		break;
2459 		}
2460 	default:
2461 		KASSERT(0, ("Unknown VOI stat type %d", vs->stype));
2462 		break;
2463 	}
2464 
2465 	vs->errs = 0;
2466 	vs->flags &= ~VS_VSDVALID;
2467 
2468 	return (0);
2469 }
2470 
2471 int
2472 stats_v1_blob_snapshot(struct statsblobv1 **dst, size_t dstmaxsz,
2473     struct statsblobv1 *src, uint32_t flags)
2474 {
2475 	int error;
2476 
2477 	if (src != NULL && src->abi == STATS_ABI_V1) {
2478 		error = stats_v1_blob_clone(dst, dstmaxsz, src, flags);
2479 		if (!error) {
2480 			if (flags & SB_CLONE_RSTSRC) {
2481 				stats_v1_blob_iter(src,
2482 				    stats_v1_icb_reset_voistat, NULL, 0);
2483 				src->lastrst = stats_sbinuptime();
2484 			}
2485 			stats_v1_blob_finalise(*dst);
2486 		}
2487 	} else
2488 		error = EINVAL;
2489 
2490 	return (error);
2491 }
2492 
2493 static inline int
2494 stats_v1_voi_update_max(enum vsd_dtype voi_dtype __unused,
2495     struct voistatdata *voival, struct voistat *vs, void *vsd)
2496 {
2497 	int error;
2498 
2499 	KASSERT(vs->dtype < VSD_NUM_DTYPES,
2500 	    ("Unknown VSD dtype %d", vs->dtype));
2501 
2502 	error = 0;
2503 
2504 	switch (vs->dtype) {
2505 	case VSD_DTYPE_INT_S32:
2506 		if (VSD(int32, vsd)->s32 < voival->int32.s32) {
2507 			VSD(int32, vsd)->s32 = voival->int32.s32;
2508 			vs->flags |= VS_VSDVALID;
2509 		}
2510 		break;
2511 	case VSD_DTYPE_INT_U32:
2512 		if (VSD(int32, vsd)->u32 < voival->int32.u32) {
2513 			VSD(int32, vsd)->u32 = voival->int32.u32;
2514 			vs->flags |= VS_VSDVALID;
2515 		}
2516 		break;
2517 	case VSD_DTYPE_INT_S64:
2518 		if (VSD(int64, vsd)->s64 < voival->int64.s64) {
2519 			VSD(int64, vsd)->s64 = voival->int64.s64;
2520 			vs->flags |= VS_VSDVALID;
2521 		}
2522 		break;
2523 	case VSD_DTYPE_INT_U64:
2524 		if (VSD(int64, vsd)->u64 < voival->int64.u64) {
2525 			VSD(int64, vsd)->u64 = voival->int64.u64;
2526 			vs->flags |= VS_VSDVALID;
2527 		}
2528 		break;
2529 	case VSD_DTYPE_INT_SLONG:
2530 		if (VSD(intlong, vsd)->slong < voival->intlong.slong) {
2531 			VSD(intlong, vsd)->slong = voival->intlong.slong;
2532 			vs->flags |= VS_VSDVALID;
2533 		}
2534 		break;
2535 	case VSD_DTYPE_INT_ULONG:
2536 		if (VSD(intlong, vsd)->ulong < voival->intlong.ulong) {
2537 			VSD(intlong, vsd)->ulong = voival->intlong.ulong;
2538 			vs->flags |= VS_VSDVALID;
2539 		}
2540 		break;
2541 	case VSD_DTYPE_Q_S32:
2542 		if (Q_QLTQ(VSD(q32, vsd)->sq32, voival->q32.sq32) &&
2543 		    (0 == (error = Q_QCPYVALQ(&VSD(q32, vsd)->sq32,
2544 		    voival->q32.sq32)))) {
2545 			vs->flags |= VS_VSDVALID;
2546 		}
2547 		break;
2548 	case VSD_DTYPE_Q_U32:
2549 		if (Q_QLTQ(VSD(q32, vsd)->uq32, voival->q32.uq32) &&
2550 		    (0 == (error = Q_QCPYVALQ(&VSD(q32, vsd)->uq32,
2551 		    voival->q32.uq32)))) {
2552 			vs->flags |= VS_VSDVALID;
2553 		}
2554 		break;
2555 	case VSD_DTYPE_Q_S64:
2556 		if (Q_QLTQ(VSD(q64, vsd)->sq64, voival->q64.sq64) &&
2557 		    (0 == (error = Q_QCPYVALQ(&VSD(q64, vsd)->sq64,
2558 		    voival->q64.sq64)))) {
2559 			vs->flags |= VS_VSDVALID;
2560 		}
2561 		break;
2562 	case VSD_DTYPE_Q_U64:
2563 		if (Q_QLTQ(VSD(q64, vsd)->uq64, voival->q64.uq64) &&
2564 		    (0 == (error = Q_QCPYVALQ(&VSD(q64, vsd)->uq64,
2565 		    voival->q64.uq64)))) {
2566 			vs->flags |= VS_VSDVALID;
2567 		}
2568 		break;
2569 	default:
2570 		error = EINVAL;
2571 		break;
2572 	}
2573 
2574 	return (error);
2575 }
2576 
2577 static inline int
2578 stats_v1_voi_update_min(enum vsd_dtype voi_dtype __unused,
2579     struct voistatdata *voival, struct voistat *vs, void *vsd)
2580 {
2581 	int error;
2582 
2583 	KASSERT(vs->dtype < VSD_NUM_DTYPES,
2584 	    ("Unknown VSD dtype %d", vs->dtype));
2585 
2586 	error = 0;
2587 
2588 	switch (vs->dtype) {
2589 	case VSD_DTYPE_INT_S32:
2590 		if (VSD(int32, vsd)->s32 > voival->int32.s32) {
2591 			VSD(int32, vsd)->s32 = voival->int32.s32;
2592 			vs->flags |= VS_VSDVALID;
2593 		}
2594 		break;
2595 	case VSD_DTYPE_INT_U32:
2596 		if (VSD(int32, vsd)->u32 > voival->int32.u32) {
2597 			VSD(int32, vsd)->u32 = voival->int32.u32;
2598 			vs->flags |= VS_VSDVALID;
2599 		}
2600 		break;
2601 	case VSD_DTYPE_INT_S64:
2602 		if (VSD(int64, vsd)->s64 > voival->int64.s64) {
2603 			VSD(int64, vsd)->s64 = voival->int64.s64;
2604 			vs->flags |= VS_VSDVALID;
2605 		}
2606 		break;
2607 	case VSD_DTYPE_INT_U64:
2608 		if (VSD(int64, vsd)->u64 > voival->int64.u64) {
2609 			VSD(int64, vsd)->u64 = voival->int64.u64;
2610 			vs->flags |= VS_VSDVALID;
2611 		}
2612 		break;
2613 	case VSD_DTYPE_INT_SLONG:
2614 		if (VSD(intlong, vsd)->slong > voival->intlong.slong) {
2615 			VSD(intlong, vsd)->slong = voival->intlong.slong;
2616 			vs->flags |= VS_VSDVALID;
2617 		}
2618 		break;
2619 	case VSD_DTYPE_INT_ULONG:
2620 		if (VSD(intlong, vsd)->ulong > voival->intlong.ulong) {
2621 			VSD(intlong, vsd)->ulong = voival->intlong.ulong;
2622 			vs->flags |= VS_VSDVALID;
2623 		}
2624 		break;
2625 	case VSD_DTYPE_Q_S32:
2626 		if (Q_QGTQ(VSD(q32, vsd)->sq32, voival->q32.sq32) &&
2627 		    (0 == (error = Q_QCPYVALQ(&VSD(q32, vsd)->sq32,
2628 		    voival->q32.sq32)))) {
2629 			vs->flags |= VS_VSDVALID;
2630 		}
2631 		break;
2632 	case VSD_DTYPE_Q_U32:
2633 		if (Q_QGTQ(VSD(q32, vsd)->uq32, voival->q32.uq32) &&
2634 		    (0 == (error = Q_QCPYVALQ(&VSD(q32, vsd)->uq32,
2635 		    voival->q32.uq32)))) {
2636 			vs->flags |= VS_VSDVALID;
2637 		}
2638 		break;
2639 	case VSD_DTYPE_Q_S64:
2640 		if (Q_QGTQ(VSD(q64, vsd)->sq64, voival->q64.sq64) &&
2641 		    (0 == (error = Q_QCPYVALQ(&VSD(q64, vsd)->sq64,
2642 		    voival->q64.sq64)))) {
2643 			vs->flags |= VS_VSDVALID;
2644 		}
2645 		break;
2646 	case VSD_DTYPE_Q_U64:
2647 		if (Q_QGTQ(VSD(q64, vsd)->uq64, voival->q64.uq64) &&
2648 		    (0 == (error = Q_QCPYVALQ(&VSD(q64, vsd)->uq64,
2649 		    voival->q64.uq64)))) {
2650 			vs->flags |= VS_VSDVALID;
2651 		}
2652 		break;
2653 	default:
2654 		error = EINVAL;
2655 		break;
2656 	}
2657 
2658 	return (error);
2659 }
2660 
2661 static inline int
2662 stats_v1_voi_update_sum(enum vsd_dtype voi_dtype __unused,
2663     struct voistatdata *voival, struct voistat *vs, void *vsd)
2664 {
2665 	int error;
2666 
2667 	KASSERT(vs->dtype < VSD_NUM_DTYPES,
2668 	    ("Unknown VSD dtype %d", vs->dtype));
2669 
2670 	error = 0;
2671 
2672 	switch (vs->dtype) {
2673 	case VSD_DTYPE_INT_S32:
2674 		VSD(int32, vsd)->s32 += voival->int32.s32;
2675 		break;
2676 	case VSD_DTYPE_INT_U32:
2677 		VSD(int32, vsd)->u32 += voival->int32.u32;
2678 		break;
2679 	case VSD_DTYPE_INT_S64:
2680 		VSD(int64, vsd)->s64 += voival->int64.s64;
2681 		break;
2682 	case VSD_DTYPE_INT_U64:
2683 		VSD(int64, vsd)->u64 += voival->int64.u64;
2684 		break;
2685 	case VSD_DTYPE_INT_SLONG:
2686 		VSD(intlong, vsd)->slong += voival->intlong.slong;
2687 		break;
2688 	case VSD_DTYPE_INT_ULONG:
2689 		VSD(intlong, vsd)->ulong += voival->intlong.ulong;
2690 		break;
2691 	case VSD_DTYPE_Q_S32:
2692 		error = Q_QADDQ(&VSD(q32, vsd)->sq32, voival->q32.sq32);
2693 		break;
2694 	case VSD_DTYPE_Q_U32:
2695 		error = Q_QADDQ(&VSD(q32, vsd)->uq32, voival->q32.uq32);
2696 		break;
2697 	case VSD_DTYPE_Q_S64:
2698 		error = Q_QADDQ(&VSD(q64, vsd)->sq64, voival->q64.sq64);
2699 		break;
2700 	case VSD_DTYPE_Q_U64:
2701 		error = Q_QADDQ(&VSD(q64, vsd)->uq64, voival->q64.uq64);
2702 		break;
2703 	default:
2704 		error = EINVAL;
2705 		break;
2706 	}
2707 
2708 	if (!error)
2709 		vs->flags |= VS_VSDVALID;
2710 
2711 	return (error);
2712 }
2713 
2714 static inline int
2715 stats_v1_voi_update_hist(enum vsd_dtype voi_dtype, struct voistatdata *voival,
2716     struct voistat *vs, struct voistatdata_hist *hist)
2717 {
2718 	struct voistatdata_numeric *bkt_lb, *bkt_ub;
2719 	uint64_t *oob64, *cnt64;
2720 	uint32_t *oob32, *cnt32;
2721 	int error, i, found, is32bit, has_ub, eq_only;
2722 
2723 	error = 0;
2724 
2725 	switch (vs->dtype) {
2726 	case VSD_DTYPE_CRHIST32:
2727 		i = HIST_VSDSZ2NBKTS(crhist32, vs->dsz);
2728 		is32bit = 1;
2729 		has_ub = eq_only = 0;
2730 		oob32 = &VSD(crhist32, hist)->oob;
2731 		break;
2732 	case VSD_DTYPE_DRHIST32:
2733 		i = HIST_VSDSZ2NBKTS(drhist32, vs->dsz);
2734 		is32bit = has_ub = 1;
2735 		eq_only = 0;
2736 		oob32 = &VSD(drhist32, hist)->oob;
2737 		break;
2738 	case VSD_DTYPE_DVHIST32:
2739 		i = HIST_VSDSZ2NBKTS(dvhist32, vs->dsz);
2740 		is32bit = eq_only = 1;
2741 		has_ub = 0;
2742 		oob32 = &VSD(dvhist32, hist)->oob;
2743 		break;
2744 	case VSD_DTYPE_CRHIST64:
2745 		i = HIST_VSDSZ2NBKTS(crhist64, vs->dsz);
2746 		is32bit = has_ub = eq_only = 0;
2747 		oob64 = &VSD(crhist64, hist)->oob;
2748 		break;
2749 	case VSD_DTYPE_DRHIST64:
2750 		i = HIST_VSDSZ2NBKTS(drhist64, vs->dsz);
2751 		is32bit = eq_only = 0;
2752 		has_ub = 1;
2753 		oob64 = &VSD(drhist64, hist)->oob;
2754 		break;
2755 	case VSD_DTYPE_DVHIST64:
2756 		i = HIST_VSDSZ2NBKTS(dvhist64, vs->dsz);
2757 		is32bit = has_ub = 0;
2758 		eq_only = 1;
2759 		oob64 = &VSD(dvhist64, hist)->oob;
2760 		break;
2761 	default:
2762 		return (EINVAL);
2763 	}
2764 	i--; /* Adjust for 0-based array index. */
2765 
2766 	/* XXXLAS: Should probably use a better bucket search algorithm. ARB? */
2767 	for (found = 0; i >= 0 && !found; i--) {
2768 		switch (vs->dtype) {
2769 		case VSD_DTYPE_CRHIST32:
2770 			bkt_lb = &VSD(crhist32, hist)->bkts[i].lb;
2771 			cnt32 = &VSD(crhist32, hist)->bkts[i].cnt;
2772 			break;
2773 		case VSD_DTYPE_DRHIST32:
2774 			bkt_lb = &VSD(drhist32, hist)->bkts[i].lb;
2775 			bkt_ub = &VSD(drhist32, hist)->bkts[i].ub;
2776 			cnt32 = &VSD(drhist32, hist)->bkts[i].cnt;
2777 			break;
2778 		case VSD_DTYPE_DVHIST32:
2779 			bkt_lb = &VSD(dvhist32, hist)->bkts[i].val;
2780 			cnt32 = &VSD(dvhist32, hist)->bkts[i].cnt;
2781 			break;
2782 		case VSD_DTYPE_CRHIST64:
2783 			bkt_lb = &VSD(crhist64, hist)->bkts[i].lb;
2784 			cnt64 = &VSD(crhist64, hist)->bkts[i].cnt;
2785 			break;
2786 		case VSD_DTYPE_DRHIST64:
2787 			bkt_lb = &VSD(drhist64, hist)->bkts[i].lb;
2788 			bkt_ub = &VSD(drhist64, hist)->bkts[i].ub;
2789 			cnt64 = &VSD(drhist64, hist)->bkts[i].cnt;
2790 			break;
2791 		case VSD_DTYPE_DVHIST64:
2792 			bkt_lb = &VSD(dvhist64, hist)->bkts[i].val;
2793 			cnt64 = &VSD(dvhist64, hist)->bkts[i].cnt;
2794 			break;
2795 		default:
2796 			return (EINVAL);
2797 		}
2798 
2799 		switch (voi_dtype) {
2800 		case VSD_DTYPE_INT_S32:
2801 			if (voival->int32.s32 >= bkt_lb->int32.s32) {
2802 				if ((eq_only && voival->int32.s32 ==
2803 				    bkt_lb->int32.s32) ||
2804 				    (!eq_only && (!has_ub ||
2805 				    voival->int32.s32 < bkt_ub->int32.s32)))
2806 					found = 1;
2807 			}
2808 			break;
2809 		case VSD_DTYPE_INT_U32:
2810 			if (voival->int32.u32 >= bkt_lb->int32.u32) {
2811 				if ((eq_only && voival->int32.u32 ==
2812 				    bkt_lb->int32.u32) ||
2813 				    (!eq_only && (!has_ub ||
2814 				    voival->int32.u32 < bkt_ub->int32.u32)))
2815 					found = 1;
2816 			}
2817 			break;
2818 		case VSD_DTYPE_INT_S64:
2819 			if (voival->int64.s64 >= bkt_lb->int64.s64)
2820 				if ((eq_only && voival->int64.s64 ==
2821 				    bkt_lb->int64.s64) ||
2822 				    (!eq_only && (!has_ub ||
2823 				    voival->int64.s64 < bkt_ub->int64.s64)))
2824 					found = 1;
2825 			break;
2826 		case VSD_DTYPE_INT_U64:
2827 			if (voival->int64.u64 >= bkt_lb->int64.u64)
2828 				if ((eq_only && voival->int64.u64 ==
2829 				    bkt_lb->int64.u64) ||
2830 				    (!eq_only && (!has_ub ||
2831 				    voival->int64.u64 < bkt_ub->int64.u64)))
2832 					found = 1;
2833 			break;
2834 		case VSD_DTYPE_INT_SLONG:
2835 			if (voival->intlong.slong >= bkt_lb->intlong.slong)
2836 				if ((eq_only && voival->intlong.slong ==
2837 				    bkt_lb->intlong.slong) ||
2838 				    (!eq_only && (!has_ub ||
2839 				    voival->intlong.slong <
2840 				    bkt_ub->intlong.slong)))
2841 					found = 1;
2842 			break;
2843 		case VSD_DTYPE_INT_ULONG:
2844 			if (voival->intlong.ulong >= bkt_lb->intlong.ulong)
2845 				if ((eq_only && voival->intlong.ulong ==
2846 				    bkt_lb->intlong.ulong) ||
2847 				    (!eq_only && (!has_ub ||
2848 				    voival->intlong.ulong <
2849 				    bkt_ub->intlong.ulong)))
2850 					found = 1;
2851 			break;
2852 		case VSD_DTYPE_Q_S32:
2853 			if (Q_QGEQ(voival->q32.sq32, bkt_lb->q32.sq32))
2854 				if ((eq_only && Q_QEQ(voival->q32.sq32,
2855 				    bkt_lb->q32.sq32)) ||
2856 				    (!eq_only && (!has_ub ||
2857 				    Q_QLTQ(voival->q32.sq32,
2858 				    bkt_ub->q32.sq32))))
2859 					found = 1;
2860 			break;
2861 		case VSD_DTYPE_Q_U32:
2862 			if (Q_QGEQ(voival->q32.uq32, bkt_lb->q32.uq32))
2863 				if ((eq_only && Q_QEQ(voival->q32.uq32,
2864 				    bkt_lb->q32.uq32)) ||
2865 				    (!eq_only && (!has_ub ||
2866 				    Q_QLTQ(voival->q32.uq32,
2867 				    bkt_ub->q32.uq32))))
2868 					found = 1;
2869 			break;
2870 		case VSD_DTYPE_Q_S64:
2871 			if (Q_QGEQ(voival->q64.sq64, bkt_lb->q64.sq64))
2872 				if ((eq_only && Q_QEQ(voival->q64.sq64,
2873 				    bkt_lb->q64.sq64)) ||
2874 				    (!eq_only && (!has_ub ||
2875 				    Q_QLTQ(voival->q64.sq64,
2876 				    bkt_ub->q64.sq64))))
2877 					found = 1;
2878 			break;
2879 		case VSD_DTYPE_Q_U64:
2880 			if (Q_QGEQ(voival->q64.uq64, bkt_lb->q64.uq64))
2881 				if ((eq_only && Q_QEQ(voival->q64.uq64,
2882 				    bkt_lb->q64.uq64)) ||
2883 				    (!eq_only && (!has_ub ||
2884 				    Q_QLTQ(voival->q64.uq64,
2885 				    bkt_ub->q64.uq64))))
2886 					found = 1;
2887 			break;
2888 		default:
2889 			break;
2890 		}
2891 	}
2892 
2893 	if (found) {
2894 		if (is32bit)
2895 			*cnt32 += 1;
2896 		else
2897 			*cnt64 += 1;
2898 	} else {
2899 		if (is32bit)
2900 			*oob32 += 1;
2901 		else
2902 			*oob64 += 1;
2903 	}
2904 
2905 	vs->flags |= VS_VSDVALID;
2906 	return (error);
2907 }
2908 
2909 static inline int
2910 stats_v1_vsd_tdgst_compress(enum vsd_dtype vs_dtype,
2911     struct voistatdata_tdgst *tdgst, int attempt)
2912 {
2913 	struct ctdth32 *ctd32tree;
2914 	struct ctdth64 *ctd64tree;
2915 	struct voistatdata_tdgstctd32 *ctd32;
2916 	struct voistatdata_tdgstctd64 *ctd64;
2917 	uint64_t ebits, idxmask;
2918 	uint32_t bitsperidx, nebits;
2919 	int error, idx, is32bit, maxctds, remctds, tmperr;
2920 
2921 	error = 0;
2922 
2923 	switch (vs_dtype) {
2924 	case VSD_DTYPE_TDGSTCLUST32:
2925 		ctd32tree = &VSD(tdgstclust32, tdgst)->ctdtree;
2926 		if (!ARB_FULL(ctd32tree))
2927 			return (0);
2928 		VSD(tdgstclust32, tdgst)->compcnt++;
2929 		maxctds = remctds = ARB_MAXNODES(ctd32tree);
2930 		ARB_RESET_TREE(ctd32tree, ctdth32, maxctds);
2931 		VSD(tdgstclust32, tdgst)->smplcnt = 0;
2932 		is32bit = 1;
2933 		ctd64tree = NULL;
2934 		ctd64 = NULL;
2935 #ifdef DIAGNOSTIC
2936 		RB_INIT(&VSD(tdgstclust32, tdgst)->rbctdtree);
2937 #endif
2938 		break;
2939 	case VSD_DTYPE_TDGSTCLUST64:
2940 		ctd64tree = &VSD(tdgstclust64, tdgst)->ctdtree;
2941 		if (!ARB_FULL(ctd64tree))
2942 			return (0);
2943 		VSD(tdgstclust64, tdgst)->compcnt++;
2944 		maxctds = remctds = ARB_MAXNODES(ctd64tree);
2945 		ARB_RESET_TREE(ctd64tree, ctdth64, maxctds);
2946 		VSD(tdgstclust64, tdgst)->smplcnt = 0;
2947 		is32bit = 0;
2948 		ctd32tree = NULL;
2949 		ctd32 = NULL;
2950 #ifdef DIAGNOSTIC
2951 		RB_INIT(&VSD(tdgstclust64, tdgst)->rbctdtree);
2952 #endif
2953 		break;
2954 	default:
2955 		return (EINVAL);
2956 	}
2957 
2958 	/*
2959 	 * Rebuild the t-digest ARB by pseudorandomly selecting centroids and
2960 	 * re-inserting the mu/cnt of each as a value and corresponding weight.
2961 	 */
2962 
2963 	/*
2964 	 * XXXCEM: random(9) is currently rand(3), not random(3).  rand(3)
2965 	 * RAND_MAX happens to be approximately 31 bits (range [0,
2966 	 * 0x7ffffffd]), so the math kinda works out.  When/if this portion of
2967 	 * the code is compiled in userspace, it gets the random(3) behavior,
2968 	 * which has expected range [0, 0x7fffffff].
2969 	 */
2970 #define	bitsperrand 31
2971 	ebits = 0;
2972 	nebits = 0;
2973 	bitsperidx = fls(maxctds);
2974 	KASSERT(bitsperidx <= sizeof(ebits) << 3,
2975 	    ("%s: bitsperidx=%d, ebits=%d",
2976 	    __func__, bitsperidx, (int)(sizeof(ebits) << 3)));
2977 	idxmask = (UINT64_C(1) << bitsperidx) - 1;
2978 
2979 	/* Initialise the free list with randomised centroid indices. */
2980 	for (; remctds > 0; remctds--) {
2981 		while (nebits < bitsperidx) {
2982 			ebits |= ((uint64_t)random()) << nebits;
2983 			nebits += bitsperrand;
2984 			if (nebits > (sizeof(ebits) << 3))
2985 				nebits = sizeof(ebits) << 3;
2986 		}
2987 		idx = ebits & idxmask;
2988 		nebits -= bitsperidx;
2989 		ebits >>= bitsperidx;
2990 
2991 		/*
2992 		 * Select the next centroid to put on the ARB free list. We
2993 		 * start with the centroid at our randomly selected array index,
2994 		 * and work our way forwards until finding one (the latter
2995 		 * aspect reduces re-insertion randomness, but is good enough).
2996 		 */
2997 		do {
2998 			if (idx >= maxctds)
2999 				idx %= maxctds;
3000 
3001 			if (is32bit)
3002 				ctd32 = ARB_NODE(ctd32tree, idx);
3003 			else
3004 				ctd64 = ARB_NODE(ctd64tree, idx);
3005 		} while ((is32bit ? ARB_ISFREE(ctd32, ctdlnk) :
3006 		    ARB_ISFREE(ctd64, ctdlnk)) && ++idx);
3007 
3008 		/* Put the centroid on the ARB free list. */
3009 		if (is32bit)
3010 			ARB_RETURNFREE(ctd32tree, ctd32, ctdlnk);
3011 		else
3012 			ARB_RETURNFREE(ctd64tree, ctd64, ctdlnk);
3013 	}
3014 
3015 	/*
3016 	 * The free list now contains the randomised indices of every centroid.
3017 	 * Walk the free list from start to end, re-inserting each centroid's
3018 	 * mu/cnt. The tdgst_add() call may or may not consume the free centroid
3019 	 * we re-insert values from during each loop iteration, so we must latch
3020 	 * the index of the next free list centroid before the re-insertion
3021 	 * call. The previous loop above should have left the centroid pointer
3022 	 * pointing to the element at the head of the free list.
3023 	 */
3024 	KASSERT((is32bit ?
3025 	    ARB_FREEIDX(ctd32tree) == ARB_SELFIDX(ctd32tree, ctd32) :
3026 	    ARB_FREEIDX(ctd64tree) == ARB_SELFIDX(ctd64tree, ctd64)),
3027 	    ("%s: t-digest ARB@%p free list bug", __func__,
3028 	    (is32bit ? (void *)ctd32tree : (void *)ctd64tree)));
3029 	remctds = maxctds;
3030 	while ((is32bit ? ctd32 != NULL : ctd64 != NULL)) {
3031 		tmperr = 0;
3032 		if (is32bit) {
3033 			s64q_t x;
3034 
3035 			idx = ARB_NEXTFREEIDX(ctd32, ctdlnk);
3036 			/* Cloning a s32q_t into a s64q_t should never fail. */
3037 			tmperr = Q_QCLONEQ(&x, ctd32->mu);
3038 			tmperr = tmperr ? tmperr : stats_v1_vsd_tdgst_add(
3039 			    vs_dtype, tdgst, x, ctd32->cnt, attempt);
3040 			ctd32 = ARB_NODE(ctd32tree, idx);
3041 			KASSERT(ctd32 == NULL || ARB_ISFREE(ctd32, ctdlnk),
3042 			    ("%s: t-digest ARB@%p free list bug", __func__,
3043 			    ctd32tree));
3044 		} else {
3045 			idx = ARB_NEXTFREEIDX(ctd64, ctdlnk);
3046 			tmperr = stats_v1_vsd_tdgst_add(vs_dtype, tdgst,
3047 			    ctd64->mu, ctd64->cnt, attempt);
3048 			ctd64 = ARB_NODE(ctd64tree, idx);
3049 			KASSERT(ctd64 == NULL || ARB_ISFREE(ctd64, ctdlnk),
3050 			    ("%s: t-digest ARB@%p free list bug", __func__,
3051 			    ctd64tree));
3052 		}
3053 		/*
3054 		 * This process should not produce errors, bugs notwithstanding.
3055 		 * Just in case, latch any errors and attempt all re-insertions.
3056 		 */
3057 		error = tmperr ? tmperr : error;
3058 		remctds--;
3059 	}
3060 
3061 	KASSERT(remctds == 0, ("%s: t-digest ARB@%p free list bug", __func__,
3062 	    (is32bit ? (void *)ctd32tree : (void *)ctd64tree)));
3063 
3064 	return (error);
3065 }
3066 
3067 static inline int
3068 stats_v1_vsd_tdgst_add(enum vsd_dtype vs_dtype, struct voistatdata_tdgst *tdgst,
3069     s64q_t x, uint64_t weight, int attempt)
3070 {
3071 #ifdef DIAGNOSTIC
3072 	char qstr[Q_MAXSTRLEN(x, 10)];
3073 #endif
3074 	struct ctdth32 *ctd32tree;
3075 	struct ctdth64 *ctd64tree;
3076 	void *closest, *cur, *lb, *ub;
3077 	struct voistatdata_tdgstctd32 *ctd32;
3078 	struct voistatdata_tdgstctd64 *ctd64;
3079 	uint64_t cnt, smplcnt, sum, tmpsum;
3080 	s64q_t k, minz, q, z;
3081 	int error, is32bit, n;
3082 
3083 	error = 0;
3084 	minz = Q_INI(&z, 0, 0, Q_NFBITS(x));
3085 
3086 	switch (vs_dtype) {
3087 	case VSD_DTYPE_TDGSTCLUST32:
3088 		if ((UINT32_MAX - weight) < VSD(tdgstclust32, tdgst)->smplcnt)
3089 			error = EOVERFLOW;
3090 		smplcnt = VSD(tdgstclust32, tdgst)->smplcnt;
3091 		ctd32tree = &VSD(tdgstclust32, tdgst)->ctdtree;
3092 		is32bit = 1;
3093 		ctd64tree = NULL;
3094 		ctd64 = NULL;
3095 		break;
3096 	case VSD_DTYPE_TDGSTCLUST64:
3097 		if ((UINT64_MAX - weight) < VSD(tdgstclust64, tdgst)->smplcnt)
3098 			error = EOVERFLOW;
3099 		smplcnt = VSD(tdgstclust64, tdgst)->smplcnt;
3100 		ctd64tree = &VSD(tdgstclust64, tdgst)->ctdtree;
3101 		is32bit = 0;
3102 		ctd32tree = NULL;
3103 		ctd32 = NULL;
3104 		break;
3105 	default:
3106 		error = EINVAL;
3107 		break;
3108 	}
3109 
3110 	if (error)
3111 		return (error);
3112 
3113 	/*
3114 	 * Inspired by Ted Dunning's AVLTreeDigest.java
3115 	 */
3116 	do {
3117 #if defined(DIAGNOSTIC)
3118 		KASSERT(attempt < 5,
3119 		    ("%s: Too many attempts", __func__));
3120 #endif
3121 		if (attempt >= 5)
3122 			return (EAGAIN);
3123 
3124 		Q_SIFVAL(minz, Q_IFMAXVAL(minz));
3125 		closest = ub = NULL;
3126 		sum = tmpsum = 0;
3127 
3128 		if (is32bit)
3129 			lb = cur = (void *)(ctd32 = ARB_MIN(ctdth32, ctd32tree));
3130 		else
3131 			lb = cur = (void *)(ctd64 = ARB_MIN(ctdth64, ctd64tree));
3132 
3133 		if (lb == NULL) /* Empty tree. */
3134 			lb = (is32bit ? (void *)ARB_ROOT(ctd32tree) :
3135 			    (void *)ARB_ROOT(ctd64tree));
3136 
3137 		/*
3138 		 * Find the set of centroids with minimum distance to x and
3139 		 * compute the sum of counts for all centroids with mean less
3140 		 * than the first centroid in the set.
3141 		 */
3142 		for (; cur != NULL;
3143 		    cur = (is32bit ?
3144 		    (void *)(ctd32 = ARB_NEXT(ctdth32, ctd32tree, ctd32)) :
3145 		    (void *)(ctd64 = ARB_NEXT(ctdth64, ctd64tree, ctd64)))) {
3146 			if (is32bit) {
3147 				cnt = ctd32->cnt;
3148 				KASSERT(Q_PRECEQ(ctd32->mu, x),
3149 				    ("%s: Q_RELPREC(mu,x)=%d", __func__,
3150 				    Q_RELPREC(ctd32->mu, x)));
3151 				/* Ok to assign as both have same precision. */
3152 				z = ctd32->mu;
3153 			} else {
3154 				cnt = ctd64->cnt;
3155 				KASSERT(Q_PRECEQ(ctd64->mu, x),
3156 				    ("%s: Q_RELPREC(mu,x)=%d", __func__,
3157 				    Q_RELPREC(ctd64->mu, x)));
3158 				/* Ok to assign as both have same precision. */
3159 				z = ctd64->mu;
3160 			}
3161 
3162 			error = Q_QSUBQ(&z, x);
3163 #if defined(DIAGNOSTIC)
3164 			KASSERT(!error, ("%s: unexpected error %d", __func__,
3165 			    error));
3166 #endif
3167 			if (error)
3168 				return (error);
3169 
3170 			z = Q_QABS(z);
3171 			if (Q_QLTQ(z, minz)) {
3172 				minz = z;
3173 				lb = cur;
3174 				sum = tmpsum;
3175 				tmpsum += cnt;
3176 			} else if (Q_QGTQ(z, minz)) {
3177 				ub = cur;
3178 				break;
3179 			}
3180 		}
3181 
3182 		cur = (is32bit ?
3183 		    (void *)(ctd32 = (struct voistatdata_tdgstctd32 *)lb) :
3184 		    (void *)(ctd64 = (struct voistatdata_tdgstctd64 *)lb));
3185 
3186 		for (n = 0; cur != ub; cur = (is32bit ?
3187 		    (void *)(ctd32 = ARB_NEXT(ctdth32, ctd32tree, ctd32)) :
3188 		    (void *)(ctd64 = ARB_NEXT(ctdth64, ctd64tree, ctd64)))) {
3189 			if (is32bit)
3190 				cnt = ctd32->cnt;
3191 			else
3192 				cnt = ctd64->cnt;
3193 
3194 			q = Q_CTRLINI(16);
3195 			if (smplcnt == 1)
3196 				error = Q_QFRACI(&q, 1, 2);
3197 			else
3198 				/* [ sum + ((cnt - 1) / 2) ] / (smplcnt - 1) */
3199 				error = Q_QFRACI(&q, (sum << 1) + cnt - 1,
3200 				    (smplcnt - 1) << 1);
3201 			k = q;
3202 			/* k = q x 4 x samplcnt x attempt */
3203 			error |= Q_QMULI(&k, 4 * smplcnt * attempt);
3204 			/* k = k x (1 - q) */
3205 			error |= Q_QSUBI(&q, 1);
3206 			q = Q_QABS(q);
3207 			error |= Q_QMULQ(&k, q);
3208 #if defined(DIAGNOSTIC)
3209 #if !defined(_KERNEL)
3210 			double q_dbl, k_dbl, q2d, k2d;
3211 			q2d = Q_Q2D(q);
3212 			k2d = Q_Q2D(k);
3213 			q_dbl = smplcnt == 1 ? 0.5 :
3214 			    (sum + ((cnt - 1)  / 2.0)) / (double)(smplcnt - 1);
3215 			k_dbl = 4 * smplcnt * q_dbl * (1.0 - q_dbl) * attempt;
3216 			/*
3217 			 * If the difference between q and q_dbl is greater than
3218 			 * the fractional precision of q, something is off.
3219 			 * NB: q is holding the value of 1 - q
3220 			 */
3221 			q_dbl = 1.0 - q_dbl;
3222 			KASSERT((q_dbl > q2d ? q_dbl - q2d : q2d - q_dbl) <
3223 			    (1.05 * ((double)1 / (double)(1ULL << Q_NFBITS(q)))),
3224 			    ("Q-type q bad precision"));
3225 			KASSERT((k_dbl > k2d ? k_dbl - k2d : k2d - k_dbl) <
3226 			    1.0 + (0.01 * smplcnt),
3227 			    ("Q-type k bad precision"));
3228 #endif /* !_KERNEL */
3229 			KASSERT(!error, ("%s: unexpected error %d", __func__,
3230 			    error));
3231 #endif /* DIAGNOSTIC */
3232 			if (error)
3233 				return (error);
3234 			if ((is32bit && ((ctd32->cnt + weight) <=
3235 			    (uint64_t)Q_GIVAL(k))) ||
3236 			    (!is32bit && ((ctd64->cnt + weight) <=
3237 			    (uint64_t)Q_GIVAL(k)))) {
3238 				n++;
3239 				/* random() produces 31 bits. */
3240 				if (random() < (INT32_MAX / n))
3241 					closest = cur;
3242 			}
3243 			sum += cnt;
3244 		}
3245 	} while (closest == NULL &&
3246 	    (is32bit ? ARB_FULL(ctd32tree) : ARB_FULL(ctd64tree)) &&
3247 	    (error = stats_v1_vsd_tdgst_compress(vs_dtype, tdgst,
3248 	    attempt++)) == 0);
3249 
3250 	if (error)
3251 		return (error);
3252 
3253 	if (closest != NULL) {
3254 		/* Merge with an existing centroid. */
3255 		if (is32bit) {
3256 			ctd32 = (struct voistatdata_tdgstctd32 *)closest;
3257 			error = Q_QSUBQ(&x, ctd32->mu);
3258 			/*
3259 			 * The following calculation "x / (cnt + weight)"
3260 			 * computes the amount by which to adjust the centroid's
3261 			 * mu value in order to merge in the VOI sample.
3262 			 *
3263 			 * It can underflow (Q_QDIVI() returns ERANGE) when the
3264 			 * user centroids' fractional precision (which is
3265 			 * inherited by 'x') is too low to represent the result.
3266 			 *
3267 			 * A sophisticated approach to dealing with this issue
3268 			 * would minimise accumulation of error by tracking
3269 			 * underflow per centroid and making an adjustment when
3270 			 * a LSB's worth of underflow has accumulated.
3271 			 *
3272 			 * A simpler approach is to let the result underflow
3273 			 * i.e. merge the VOI sample into the centroid without
3274 			 * adjusting the centroid's mu, and rely on the user to
3275 			 * specify their t-digest with sufficient centroid
3276 			 * fractional precision such that the accumulation of
3277 			 * error from multiple underflows is of no material
3278 			 * consequence to the centroid's final value of mu.
3279 			 *
3280 			 * For the moment, the latter approach is employed by
3281 			 * simply ignoring ERANGE here.
3282 			 *
3283 			 * XXXLAS: Per-centroid underflow tracking is likely too
3284 			 * onerous, but it probably makes sense to accumulate a
3285 			 * single underflow error variable across all centroids
3286 			 * and report it as part of the digest to provide
3287 			 * additional visibility into the digest's fidelity.
3288 			 */
3289 			error = error ? error :
3290 			    Q_QDIVI(&x, ctd32->cnt + weight);
3291 			if ((error && error != ERANGE)
3292 			    || (error = Q_QADDQ(&ctd32->mu, x))) {
3293 #ifdef DIAGNOSTIC
3294 				KASSERT(!error, ("%s: unexpected error %d",
3295 				    __func__, error));
3296 #endif
3297 				return (error);
3298 			}
3299 			ctd32->cnt += weight;
3300 			error = ARB_REINSERT(ctdth32, ctd32tree, ctd32) ==
3301 			    NULL ? 0 : EALREADY;
3302 #ifdef DIAGNOSTIC
3303 			RB_REINSERT(rbctdth32,
3304 			    &VSD(tdgstclust32, tdgst)->rbctdtree, ctd32);
3305 #endif
3306 		} else {
3307 			ctd64 = (struct voistatdata_tdgstctd64 *)closest;
3308 			error = Q_QSUBQ(&x, ctd64->mu);
3309 			error = error ? error :
3310 			    Q_QDIVI(&x, ctd64->cnt + weight);
3311 			/* Refer to is32bit ERANGE discussion above. */
3312 			if ((error && error != ERANGE)
3313 			    || (error = Q_QADDQ(&ctd64->mu, x))) {
3314 				KASSERT(!error, ("%s: unexpected error %d",
3315 				    __func__, error));
3316 				return (error);
3317 			}
3318 			ctd64->cnt += weight;
3319 			error = ARB_REINSERT(ctdth64, ctd64tree, ctd64) ==
3320 			    NULL ? 0 : EALREADY;
3321 #ifdef DIAGNOSTIC
3322 			RB_REINSERT(rbctdth64,
3323 			    &VSD(tdgstclust64, tdgst)->rbctdtree, ctd64);
3324 #endif
3325 		}
3326 	} else {
3327 		/*
3328 		 * Add a new centroid. If digest compression is working
3329 		 * correctly, there should always be at least one free.
3330 		 */
3331 		if (is32bit) {
3332 			ctd32 = ARB_GETFREE(ctd32tree, ctdlnk);
3333 #ifdef DIAGNOSTIC
3334 			KASSERT(ctd32 != NULL,
3335 			    ("%s: t-digest@%p has no free centroids",
3336 			    __func__, tdgst));
3337 #endif
3338 			if (ctd32 == NULL)
3339 				return (EAGAIN);
3340 			if ((error = Q_QCPYVALQ(&ctd32->mu, x)))
3341 				return (error);
3342 			ctd32->cnt = weight;
3343 			error = ARB_INSERT(ctdth32, ctd32tree, ctd32) == NULL ?
3344 			    0 : EALREADY;
3345 #ifdef DIAGNOSTIC
3346 			RB_INSERT(rbctdth32,
3347 			    &VSD(tdgstclust32, tdgst)->rbctdtree, ctd32);
3348 #endif
3349 		} else {
3350 			ctd64 = ARB_GETFREE(ctd64tree, ctdlnk);
3351 #ifdef DIAGNOSTIC
3352 			KASSERT(ctd64 != NULL,
3353 			    ("%s: t-digest@%p has no free centroids",
3354 			    __func__, tdgst));
3355 #endif
3356 			if (ctd64 == NULL) /* Should not happen. */
3357 				return (EAGAIN);
3358 			/* Direct assignment ok as both have same type/prec. */
3359 			ctd64->mu = x;
3360 			ctd64->cnt = weight;
3361 			error = ARB_INSERT(ctdth64, ctd64tree, ctd64) == NULL ?
3362 			    0 : EALREADY;
3363 #ifdef DIAGNOSTIC
3364 			RB_INSERT(rbctdth64, &VSD(tdgstclust64,
3365 			    tdgst)->rbctdtree, ctd64);
3366 #endif
3367 		}
3368 	}
3369 
3370 	if (is32bit)
3371 		VSD(tdgstclust32, tdgst)->smplcnt += weight;
3372 	else {
3373 		VSD(tdgstclust64, tdgst)->smplcnt += weight;
3374 
3375 #ifdef DIAGNOSTIC
3376 		struct rbctdth64 *rbctdtree =
3377 		    &VSD(tdgstclust64, tdgst)->rbctdtree;
3378 		struct voistatdata_tdgstctd64 *rbctd64;
3379 		int i = 0;
3380 		ARB_FOREACH(ctd64, ctdth64, ctd64tree) {
3381 			rbctd64 = (i == 0 ? RB_MIN(rbctdth64, rbctdtree) :
3382 			    RB_NEXT(rbctdth64, rbctdtree, rbctd64));
3383 
3384 			if (i >= ARB_CURNODES(ctd64tree)
3385 			    || ctd64 != rbctd64
3386 			    || ARB_MIN(ctdth64, ctd64tree) !=
3387 			       RB_MIN(rbctdth64, rbctdtree)
3388 			    || ARB_MAX(ctdth64, ctd64tree) !=
3389 			       RB_MAX(rbctdth64, rbctdtree)
3390 			    || ARB_LEFTIDX(ctd64, ctdlnk) !=
3391 			       ARB_SELFIDX(ctd64tree, RB_LEFT(rbctd64, rblnk))
3392 			    || ARB_RIGHTIDX(ctd64, ctdlnk) !=
3393 			       ARB_SELFIDX(ctd64tree, RB_RIGHT(rbctd64, rblnk))
3394 			    || ARB_PARENTIDX(ctd64, ctdlnk) !=
3395 			       ARB_SELFIDX(ctd64tree,
3396 			       RB_PARENT(rbctd64, rblnk))) {
3397 				Q_TOSTR(ctd64->mu, -1, 10, qstr, sizeof(qstr));
3398 				printf("ARB ctd=%3d p=%3d l=%3d r=%3d c=%2d "
3399 				    "mu=%s\n",
3400 				    (int)ARB_SELFIDX(ctd64tree, ctd64),
3401 				    ARB_PARENTIDX(ctd64, ctdlnk),
3402 				    ARB_LEFTIDX(ctd64, ctdlnk),
3403 				    ARB_RIGHTIDX(ctd64, ctdlnk),
3404 				    ARB_COLOR(ctd64, ctdlnk),
3405 				    qstr);
3406 
3407 				Q_TOSTR(rbctd64->mu, -1, 10, qstr,
3408 				    sizeof(qstr));
3409 				printf(" RB ctd=%3d p=%3d l=%3d r=%3d c=%2d "
3410 				    "mu=%s\n",
3411 				    (int)ARB_SELFIDX(ctd64tree, rbctd64),
3412 				    (int)ARB_SELFIDX(ctd64tree,
3413 				      RB_PARENT(rbctd64, rblnk)),
3414 				    (int)ARB_SELFIDX(ctd64tree,
3415 				      RB_LEFT(rbctd64, rblnk)),
3416 				    (int)ARB_SELFIDX(ctd64tree,
3417 				      RB_RIGHT(rbctd64, rblnk)),
3418 				    RB_COLOR(rbctd64, rblnk),
3419 				    qstr);
3420 
3421 				panic("RB@%p and ARB@%p trees differ\n",
3422 				    rbctdtree, ctd64tree);
3423 			}
3424 			i++;
3425 		}
3426 #endif /* DIAGNOSTIC */
3427 	}
3428 
3429 	return (error);
3430 }
3431 
3432 static inline int
3433 stats_v1_voi_update_tdgst(enum vsd_dtype voi_dtype, struct voistatdata *voival,
3434     struct voistat *vs, struct voistatdata_tdgst *tdgst)
3435 {
3436 	s64q_t x;
3437 	int error;
3438 
3439 	error = 0;
3440 
3441 	switch (vs->dtype) {
3442 	case VSD_DTYPE_TDGSTCLUST32:
3443 		/* Use same precision as the user's centroids. */
3444 		Q_INI(&x, 0, 0, Q_NFBITS(
3445 		    ARB_CNODE(&VSD(tdgstclust32, tdgst)->ctdtree, 0)->mu));
3446 		break;
3447 	case VSD_DTYPE_TDGSTCLUST64:
3448 		/* Use same precision as the user's centroids. */
3449 		Q_INI(&x, 0, 0, Q_NFBITS(
3450 		    ARB_CNODE(&VSD(tdgstclust64, tdgst)->ctdtree, 0)->mu));
3451 		break;
3452 	default:
3453 		KASSERT(vs->dtype == VSD_DTYPE_TDGSTCLUST32 ||
3454 		    vs->dtype == VSD_DTYPE_TDGSTCLUST64,
3455 		    ("%s: vs->dtype(%d) != VSD_DTYPE_TDGSTCLUST<32|64>",
3456 		    __func__, vs->dtype));
3457 		return (EINVAL);
3458 	}
3459 
3460 	/*
3461 	 * XXXLAS: Should have both a signed and unsigned 'x' variable to avoid
3462 	 * returning EOVERFLOW if the voival would have fit in a u64q_t.
3463 	 */
3464 	switch (voi_dtype) {
3465 	case VSD_DTYPE_INT_S32:
3466 		error = Q_QCPYVALI(&x, voival->int32.s32);
3467 		break;
3468 	case VSD_DTYPE_INT_U32:
3469 		error = Q_QCPYVALI(&x, voival->int32.u32);
3470 		break;
3471 	case VSD_DTYPE_INT_S64:
3472 		error = Q_QCPYVALI(&x, voival->int64.s64);
3473 		break;
3474 	case VSD_DTYPE_INT_U64:
3475 		error = Q_QCPYVALI(&x, voival->int64.u64);
3476 		break;
3477 	case VSD_DTYPE_INT_SLONG:
3478 		error = Q_QCPYVALI(&x, voival->intlong.slong);
3479 		break;
3480 	case VSD_DTYPE_INT_ULONG:
3481 		error = Q_QCPYVALI(&x, voival->intlong.ulong);
3482 		break;
3483 	case VSD_DTYPE_Q_S32:
3484 		error = Q_QCPYVALQ(&x, voival->q32.sq32);
3485 		break;
3486 	case VSD_DTYPE_Q_U32:
3487 		error = Q_QCPYVALQ(&x, voival->q32.uq32);
3488 		break;
3489 	case VSD_DTYPE_Q_S64:
3490 		error = Q_QCPYVALQ(&x, voival->q64.sq64);
3491 		break;
3492 	case VSD_DTYPE_Q_U64:
3493 		error = Q_QCPYVALQ(&x, voival->q64.uq64);
3494 		break;
3495 	default:
3496 		error = EINVAL;
3497 		break;
3498 	}
3499 
3500 	if (error ||
3501 	    (error = stats_v1_vsd_tdgst_add(vs->dtype, tdgst, x, 1, 1)))
3502 		return (error);
3503 
3504 	vs->flags |= VS_VSDVALID;
3505 	return (0);
3506 }
3507 
3508 int
3509 stats_v1_voi_update(struct statsblobv1 *sb, int32_t voi_id,
3510     enum vsd_dtype voi_dtype, struct voistatdata *voival, uint32_t flags)
3511 {
3512 	struct voi *v;
3513 	struct voistat *vs;
3514 	void *statevsd, *vsd;
3515 	int error, i, tmperr;
3516 
3517 	error = 0;
3518 
3519 	if (sb == NULL || sb->abi != STATS_ABI_V1 || voi_id >= NVOIS(sb) ||
3520 	    voi_dtype == 0 || voi_dtype >= VSD_NUM_DTYPES || voival == NULL)
3521 		return (EINVAL);
3522 	v = &sb->vois[voi_id];
3523 	if (voi_dtype != v->dtype || v->id < 0 ||
3524 	    ((flags & SB_VOI_RELUPDATE) && !(v->flags & VOI_REQSTATE)))
3525 		return (EINVAL);
3526 
3527 	vs = BLOB_OFFSET(sb, v->stats_off);
3528 	if (v->flags & VOI_REQSTATE)
3529 		statevsd = BLOB_OFFSET(sb, vs->data_off);
3530 	else
3531 		statevsd = NULL;
3532 
3533 	if (flags & SB_VOI_RELUPDATE) {
3534 		switch (voi_dtype) {
3535 		case VSD_DTYPE_INT_S32:
3536 			voival->int32.s32 +=
3537 			    VSD(voistate, statevsd)->prev.int32.s32;
3538 			break;
3539 		case VSD_DTYPE_INT_U32:
3540 			voival->int32.u32 +=
3541 			    VSD(voistate, statevsd)->prev.int32.u32;
3542 			break;
3543 		case VSD_DTYPE_INT_S64:
3544 			voival->int64.s64 +=
3545 			    VSD(voistate, statevsd)->prev.int64.s64;
3546 			break;
3547 		case VSD_DTYPE_INT_U64:
3548 			voival->int64.u64 +=
3549 			    VSD(voistate, statevsd)->prev.int64.u64;
3550 			break;
3551 		case VSD_DTYPE_INT_SLONG:
3552 			voival->intlong.slong +=
3553 			    VSD(voistate, statevsd)->prev.intlong.slong;
3554 			break;
3555 		case VSD_DTYPE_INT_ULONG:
3556 			voival->intlong.ulong +=
3557 			    VSD(voistate, statevsd)->prev.intlong.ulong;
3558 			break;
3559 		case VSD_DTYPE_Q_S32:
3560 			error = Q_QADDQ(&voival->q32.sq32,
3561 			    VSD(voistate, statevsd)->prev.q32.sq32);
3562 			break;
3563 		case VSD_DTYPE_Q_U32:
3564 			error = Q_QADDQ(&voival->q32.uq32,
3565 			    VSD(voistate, statevsd)->prev.q32.uq32);
3566 			break;
3567 		case VSD_DTYPE_Q_S64:
3568 			error = Q_QADDQ(&voival->q64.sq64,
3569 			    VSD(voistate, statevsd)->prev.q64.sq64);
3570 			break;
3571 		case VSD_DTYPE_Q_U64:
3572 			error = Q_QADDQ(&voival->q64.uq64,
3573 			    VSD(voistate, statevsd)->prev.q64.uq64);
3574 			break;
3575 		default:
3576 			KASSERT(0, ("Unknown VOI data type %d", voi_dtype));
3577 			break;
3578 		}
3579 	}
3580 
3581 	if (error)
3582 		return (error);
3583 
3584 	for (i = v->voistatmaxid; i > 0; i--) {
3585 		vs = &((struct voistat *)BLOB_OFFSET(sb, v->stats_off))[i];
3586 		if (vs->stype < 0)
3587 			continue;
3588 
3589 		vsd = BLOB_OFFSET(sb, vs->data_off);
3590 
3591 		switch (vs->stype) {
3592 		case VS_STYPE_MAX:
3593 			tmperr = stats_v1_voi_update_max(voi_dtype, voival,
3594 			    vs, vsd);
3595 			break;
3596 		case VS_STYPE_MIN:
3597 			tmperr = stats_v1_voi_update_min(voi_dtype, voival,
3598 			    vs, vsd);
3599 			break;
3600 		case VS_STYPE_SUM:
3601 			tmperr = stats_v1_voi_update_sum(voi_dtype, voival,
3602 			    vs, vsd);
3603 			break;
3604 		case VS_STYPE_HIST:
3605 			tmperr = stats_v1_voi_update_hist(voi_dtype, voival,
3606 			    vs, vsd);
3607 			break;
3608 		case VS_STYPE_TDGST:
3609 			tmperr = stats_v1_voi_update_tdgst(voi_dtype, voival,
3610 			    vs, vsd);
3611 			break;
3612 		default:
3613 			KASSERT(0, ("Unknown VOI stat type %d", vs->stype));
3614 			break;
3615 		}
3616 
3617 		if (tmperr) {
3618 			error = tmperr;
3619 			VS_INCERRS(vs);
3620 		}
3621 	}
3622 
3623 	if (statevsd) {
3624 		switch (voi_dtype) {
3625 		case VSD_DTYPE_INT_S32:
3626 			VSD(voistate, statevsd)->prev.int32.s32 =
3627 			    voival->int32.s32;
3628 			break;
3629 		case VSD_DTYPE_INT_U32:
3630 			VSD(voistate, statevsd)->prev.int32.u32 =
3631 			    voival->int32.u32;
3632 			break;
3633 		case VSD_DTYPE_INT_S64:
3634 			VSD(voistate, statevsd)->prev.int64.s64 =
3635 			    voival->int64.s64;
3636 			break;
3637 		case VSD_DTYPE_INT_U64:
3638 			VSD(voistate, statevsd)->prev.int64.u64 =
3639 			    voival->int64.u64;
3640 			break;
3641 		case VSD_DTYPE_INT_SLONG:
3642 			VSD(voistate, statevsd)->prev.intlong.slong =
3643 			    voival->intlong.slong;
3644 			break;
3645 		case VSD_DTYPE_INT_ULONG:
3646 			VSD(voistate, statevsd)->prev.intlong.ulong =
3647 			    voival->intlong.ulong;
3648 			break;
3649 		case VSD_DTYPE_Q_S32:
3650 			error = Q_QCPYVALQ(
3651 			    &VSD(voistate, statevsd)->prev.q32.sq32,
3652 			    voival->q32.sq32);
3653 			break;
3654 		case VSD_DTYPE_Q_U32:
3655 			error = Q_QCPYVALQ(
3656 			    &VSD(voistate, statevsd)->prev.q32.uq32,
3657 			    voival->q32.uq32);
3658 			break;
3659 		case VSD_DTYPE_Q_S64:
3660 			error = Q_QCPYVALQ(
3661 			    &VSD(voistate, statevsd)->prev.q64.sq64,
3662 			    voival->q64.sq64);
3663 			break;
3664 		case VSD_DTYPE_Q_U64:
3665 			error = Q_QCPYVALQ(
3666 			    &VSD(voistate, statevsd)->prev.q64.uq64,
3667 			    voival->q64.uq64);
3668 			break;
3669 		default:
3670 			KASSERT(0, ("Unknown VOI data type %d", voi_dtype));
3671 			break;
3672 		}
3673 	}
3674 
3675 	return (error);
3676 }
3677 
3678 #ifdef _KERNEL
3679 
3680 static void
3681 stats_init(void *arg)
3682 {
3683 
3684 }
3685 SYSINIT(stats, SI_SUB_KDTRACE, SI_ORDER_FIRST, stats_init, NULL);
3686 
3687 /*
3688  * Sysctl handler to display the list of available stats templates.
3689  */
3690 static int
3691 stats_tpl_list_available(SYSCTL_HANDLER_ARGS)
3692 {
3693 	struct sbuf *s;
3694 	int err, i;
3695 
3696 	err = 0;
3697 
3698 	/* We can tolerate ntpl being stale, so do not take the lock. */
3699 	s = sbuf_new(NULL, NULL, /* +1 per tpl for , */
3700 	    ntpl * (STATS_TPL_MAX_STR_SPEC_LEN + 1), SBUF_FIXEDLEN);
3701 	if (s == NULL)
3702 		return (ENOMEM);
3703 
3704 	TPL_LIST_RLOCK();
3705 	for (i = 0; i < ntpl; i++) {
3706 		err = sbuf_printf(s, "%s\"%s\":%u", i ? "," : "",
3707 		    tpllist[i]->mb->tplname, tpllist[i]->mb->tplhash);
3708 		if (err) {
3709 			/* Sbuf overflow condition. */
3710 			err = EOVERFLOW;
3711 			break;
3712 		}
3713 	}
3714 	TPL_LIST_RUNLOCK();
3715 
3716 	if (!err) {
3717 		sbuf_finish(s);
3718 		err = sysctl_handle_string(oidp, sbuf_data(s), 0, req);
3719 	}
3720 
3721 	sbuf_delete(s);
3722 	return (err);
3723 }
3724 
3725 /*
3726  * Called by subsystem-specific sysctls to report and/or parse the list of
3727  * templates being sampled and their sampling rates. A stats_tpl_sr_cb_t
3728  * conformant function pointer must be passed in as arg1, which is used to
3729  * interact with the subsystem's stats template sample rates list. If arg2 > 0,
3730  * a zero-initialised allocation of arg2-sized contextual memory is
3731  * heap-allocated and passed in to all subsystem callbacks made during the
3732  * operation of stats_tpl_sample_rates().
3733  *
3734  * XXXLAS: Assumes templates are never removed, which is currently true but may
3735  * need to be reworked in future if dynamic template management becomes a
3736  * requirement e.g. to support kernel module based templates.
3737  */
3738 int
3739 stats_tpl_sample_rates(SYSCTL_HANDLER_ARGS)
3740 {
3741 	char kvpair_fmt[16], tplspec_fmt[16];
3742 	char tpl_spec[STATS_TPL_MAX_STR_SPEC_LEN];
3743 	char tpl_name[TPL_MAX_NAME_LEN + 2]; /* +2 for "" */
3744 	stats_tpl_sr_cb_t subsys_cb;
3745 	void *subsys_ctx;
3746 	char *buf, *new_rates_usr_str, *tpl_name_p;
3747 	struct stats_tpl_sample_rate *rates;
3748 	struct sbuf *s, _s;
3749 	uint32_t cum_pct, pct, tpl_hash;
3750 	int err, i, off, len, newlen, nrates;
3751 
3752 	buf = NULL;
3753 	rates = NULL;
3754 	err = nrates = 0;
3755 	subsys_cb = (stats_tpl_sr_cb_t)arg1;
3756 	KASSERT(subsys_cb != NULL, ("%s: subsys_cb == arg1 == NULL", __func__));
3757 	if (arg2 > 0)
3758 		subsys_ctx = malloc(arg2, M_TEMP, M_WAITOK | M_ZERO);
3759 	else
3760 		subsys_ctx = NULL;
3761 
3762 	/* Grab current count of subsystem rates. */
3763 	err = subsys_cb(TPL_SR_UNLOCKED_GET, NULL, &nrates, subsys_ctx);
3764 	if (err)
3765 		goto done;
3766 
3767 	/* +1 to ensure we can append '\0' post copyin, +5 per rate for =nnn, */
3768 	len = max(req->newlen + 1, nrates * (STATS_TPL_MAX_STR_SPEC_LEN + 5));
3769 
3770 	if (req->oldptr != NULL || req->newptr != NULL)
3771 		buf = malloc(len, M_TEMP, M_WAITOK);
3772 
3773 	if (req->oldptr != NULL) {
3774 		if (nrates == 0) {
3775 			/* No rates, so return an empty string via oldptr. */
3776 			err = SYSCTL_OUT(req, "", 1);
3777 			if (err)
3778 				goto done;
3779 			goto process_new;
3780 		}
3781 
3782 		s = sbuf_new(&_s, buf, len, SBUF_FIXEDLEN | SBUF_INCLUDENUL);
3783 
3784 		/* Grab locked count of, and ptr to, subsystem rates. */
3785 		err = subsys_cb(TPL_SR_RLOCKED_GET, &rates, &nrates,
3786 		    subsys_ctx);
3787 		if (err)
3788 			goto done;
3789 		TPL_LIST_RLOCK();
3790 		for (i = 0; i < nrates && !err; i++) {
3791 			err = sbuf_printf(s, "%s\"%s\":%u=%u", i ? "," : "",
3792 			    tpllist[rates[i].tpl_slot_id]->mb->tplname,
3793 			    tpllist[rates[i].tpl_slot_id]->mb->tplhash,
3794 			    rates[i].tpl_sample_pct);
3795 		}
3796 		TPL_LIST_RUNLOCK();
3797 		/* Tell subsystem that we're done with its rates list. */
3798 		err = subsys_cb(TPL_SR_RUNLOCK, &rates, &nrates, subsys_ctx);
3799 		if (err)
3800 			goto done;
3801 
3802 		err = sbuf_finish(s);
3803 		if (err)
3804 			goto done; /* We lost a race for buf to be too small. */
3805 
3806 		/* Return the rendered string data via oldptr. */
3807 		err = SYSCTL_OUT(req, sbuf_data(s), sbuf_len(s));
3808 	} else {
3809 		/* Return the upper bound size for buffer sizing requests. */
3810 		err = SYSCTL_OUT(req, NULL, len);
3811 	}
3812 
3813 process_new:
3814 	if (err || req->newptr == NULL)
3815 		goto done;
3816 
3817 	newlen = req->newlen - req->newidx;
3818 	err = SYSCTL_IN(req, buf, newlen);
3819 	if (err)
3820 		goto done;
3821 
3822 	/*
3823 	 * Initialise format strings at run time.
3824 	 *
3825 	 * Write the max template spec string length into the
3826 	 * template_spec=percent key-value pair parsing format string as:
3827 	 *     " %<width>[^=]=%u %n"
3828 	 *
3829 	 * Write the max template name string length into the tplname:tplhash
3830 	 * parsing format string as:
3831 	 *     "%<width>[^:]:%u"
3832 	 *
3833 	 * Subtract 1 for \0 appended by sscanf().
3834 	 */
3835 	sprintf(kvpair_fmt, " %%%zu[^=]=%%u %%n", sizeof(tpl_spec) - 1);
3836 	sprintf(tplspec_fmt, "%%%zu[^:]:%%u", sizeof(tpl_name) - 1);
3837 
3838 	/*
3839 	 * Parse each CSV key-value pair specifying a template and its sample
3840 	 * percentage. Whitespace either side of a key-value pair is ignored.
3841 	 * Templates can be specified by name, hash, or name and hash per the
3842 	 * following formats (chars in [] are optional):
3843 	 *    ["]<tplname>["]=<percent>
3844 	 *    :hash=pct
3845 	 *    ["]<tplname>["]:hash=<percent>
3846 	 */
3847 	cum_pct = nrates = 0;
3848 	rates = NULL;
3849 	buf[newlen] = '\0'; /* buf is at least newlen+1 in size. */
3850 	new_rates_usr_str = buf;
3851 	while (isspace(*new_rates_usr_str))
3852 		new_rates_usr_str++; /* Skip leading whitespace. */
3853 	while (*new_rates_usr_str != '\0') {
3854 		tpl_name_p = tpl_name;
3855 		tpl_name[0] = '\0';
3856 		tpl_hash = 0;
3857 		off = 0;
3858 
3859 		/*
3860 		 * Parse key-value pair which must perform 2 conversions, then
3861 		 * parse the template spec to extract either name, hash, or name
3862 		 * and hash depending on the three possible spec formats. The
3863 		 * tplspec_fmt format specifier parses name or name and hash
3864 		 * template specs, while the ":%u" format specifier parses
3865 		 * hash-only template specs. If parsing is successfull, ensure
3866 		 * the cumulative sampling percentage does not exceed 100.
3867 		 */
3868 		err = EINVAL;
3869 		if (2 != sscanf(new_rates_usr_str, kvpair_fmt, tpl_spec, &pct,
3870 		    &off))
3871 			break;
3872 		if ((1 > sscanf(tpl_spec, tplspec_fmt, tpl_name, &tpl_hash)) &&
3873 		    (1 != sscanf(tpl_spec, ":%u", &tpl_hash)))
3874 			break;
3875 		if ((cum_pct += pct) > 100)
3876 			break;
3877 		err = 0;
3878 
3879 		/* Strip surrounding "" from template name if present. */
3880 		len = strlen(tpl_name);
3881 		if (len > 0) {
3882 			if (tpl_name[len - 1] == '"')
3883 				tpl_name[--len] = '\0';
3884 			if (tpl_name[0] == '"') {
3885 				tpl_name_p++;
3886 				len--;
3887 			}
3888 		}
3889 
3890 		rates = stats_realloc(rates, 0, /* oldsz is unused in kernel. */
3891 		    (nrates + 1) * sizeof(*rates), M_WAITOK);
3892 		rates[nrates].tpl_slot_id =
3893 		    stats_tpl_fetch_allocid(len ? tpl_name_p : NULL, tpl_hash);
3894 		if (rates[nrates].tpl_slot_id < 0) {
3895 			err = -rates[nrates].tpl_slot_id;
3896 			break;
3897 		}
3898 		rates[nrates].tpl_sample_pct = pct;
3899 		nrates++;
3900 		new_rates_usr_str += off;
3901 		if (*new_rates_usr_str != ',')
3902 			break; /* End-of-input or malformed. */
3903 		new_rates_usr_str++; /* Move past comma to next pair. */
3904 	}
3905 
3906 	if (!err) {
3907 		if ((new_rates_usr_str - buf) < newlen) {
3908 			/* Entire input has not been consumed. */
3909 			err = EINVAL;
3910 		} else {
3911 			/*
3912 			 * Give subsystem the new rates. They'll return the
3913 			 * appropriate rates pointer for us to garbage collect.
3914 			 */
3915 			err = subsys_cb(TPL_SR_PUT, &rates, &nrates,
3916 			    subsys_ctx);
3917 		}
3918 	}
3919 	stats_free(rates);
3920 
3921 done:
3922 	free(buf, M_TEMP);
3923 	free(subsys_ctx, M_TEMP);
3924 	return (err);
3925 }
3926 
3927 SYSCTL_NODE(_kern, OID_AUTO, stats, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
3928     "stats(9) MIB");
3929 
3930 SYSCTL_PROC(_kern_stats, OID_AUTO, templates,
3931     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
3932     stats_tpl_list_available, "A",
3933     "list the name/hash of all available stats(9) templates");
3934 
3935 #else /* ! _KERNEL */
3936 
3937 static void __attribute__ ((constructor))
3938 stats_constructor(void)
3939 {
3940 
3941 	pthread_rwlock_init(&tpllistlock, NULL);
3942 }
3943 
3944 static void __attribute__ ((destructor))
3945 stats_destructor(void)
3946 {
3947 
3948 	pthread_rwlock_destroy(&tpllistlock);
3949 }
3950 
3951 #endif /* _KERNEL */
3952