xref: /illumos-gate/usr/src/test/os-tests/tests/xsave/xsave_util.c (revision 9b9d39d2a32ff806d2431dbcc50968ef1e6d46b2)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2023 Oxide Computer Company
14  */
15 
16 /*
17  * This file implements various utility functions we use for the xsave tests.
18  */
19 
20 #include <string.h>
21 #include <strings.h>
22 #include <sys/auxv.h>
23 #include <sys/sysmacros.h>
24 #include <err.h>
25 #include <stdlib.h>
26 #include <procfs.h>
27 #include <sys/x86_archext.h>
28 #include <unistd.h>
29 #include <errno.h>
30 #include <sys/types.h>
31 #include <sys/wait.h>
32 #include <sys/debug.h>
33 #include <ieeefp.h>
34 
35 #include "xsave_util.h"
36 
37 static uint_t xsu_proc_timeout = 60 * 1000; /* 60s in ms */
38 
39 /*
40  * Determine if we have the hardware support required for a given level of
41  * hardware support.
42  */
43 uint32_t
44 xsu_hwsupport(void)
45 {
46 	uint_t isa[3];
47 	uint_t nisa = getisax(isa, ARRAY_SIZE(isa));
48 
49 	if (nisa != ARRAY_SIZE(isa)) {
50 		errx(EXIT_FAILURE, "did not get all %zu hwcap values, found %u",
51 		    ARRAY_SIZE(isa), nisa);
52 	}
53 
54 	if ((isa[0] & AV_386_XSAVE) == 0) {
55 		errx(EXIT_FAILURE, "xsave not present: this test should have "
56 		    "been skipped");
57 	}
58 
59 	if ((isa[1] & AV_386_2_AVX512F) != 0) {
60 		warnx("found %%zmm support");
61 		return (XSU_ZMM);
62 	}
63 
64 	if ((isa[0] & AV_386_AVX) != 0) {
65 		warnx("found %%ymm support");
66 		return (XSU_YMM);
67 	}
68 
69 	errx(EXIT_FAILURE, "no non-XMM xsave state found: this test should "
70 	    "have been skipped");
71 }
72 
73 /*
74  * Fill all the valid regions of an FPU based on treating the vector register as
75  * a series of uint32_t values and going from there.
76  */
77 void
78 xsu_fill(xsu_fpu_t *fpu, uint32_t level, uint32_t start)
79 {
80 	(void) memset(fpu, 0, sizeof (xsu_fpu_t));
81 
82 	switch (level) {
83 	default:
84 		errx(EXIT_FAILURE, "given unknown xsu level: 0x%x", level);
85 	case XSU_YMM:
86 		for (uint32_t regno = 0; regno < XSU_MAX_YMM; regno++) {
87 			for (uint32_t u32 = 0; u32 < XSU_YMM_U32; u32++,
88 			    start++) {
89 				fpu->xf_reg[regno]._l[u32] = start;
90 			}
91 		}
92 		break;
93 	case XSU_ZMM:
94 		for (uint32_t regno = 0; regno < XSU_MAX_ZMM; regno++) {
95 			for (uint32_t u32 = 0; u32 < XSU_ZMM_U32; u32++,
96 			    start++) {
97 				fpu->xf_reg[regno]._l[u32] = start;
98 			}
99 		}
100 		for (uint32_t regno = 0; regno < ARRAY_SIZE(fpu->xf_opmask);
101 		    regno++) {
102 			uint64_t val = start | (((uint64_t)start + 1) << 32);
103 			fpu->xf_opmask[regno] = val;
104 			start += 2;
105 		}
106 		break;
107 	}
108 }
109 
110 static void
111 xsu_overwrite_uctx_xmm(ucontext_t *uctx, const xsu_fpu_t *fpu)
112 {
113 	struct _fpchip_state *fp;
114 
115 	fp = &uctx->uc_mcontext.fpregs.fp_reg_set.fpchip_state;
116 	for (uint32_t i = 0; i < XSU_MAX_XMM; i++) {
117 		(void) memcpy(&fp->xmm[i], &fpu->xf_reg[i]._l[0],
118 		    XSU_XMM_U32 * sizeof (uint32_t));
119 	}
120 }
121 
122 static void
123 xsu_overwrite_uctx_ymm(uintptr_t arg, const xsu_fpu_t *fpu)
124 {
125 	prxregset_ymm_t *ymm = (void *)arg;
126 
127 	for (uint32_t i = 0; i < XSU_MAX_YMM; i++) {
128 		(void) memcpy(&ymm->prx_ymm[i]._l[0],
129 		    &fpu->xf_reg[i]._l[XSU_XMM_U32],
130 		    XSU_XMM_U32 * sizeof (uint32_t));
131 	}
132 }
133 
134 static void
135 xsu_overwrite_uctx_zmm(uintptr_t arg, const xsu_fpu_t *fpu)
136 {
137 	prxregset_zmm_t *zmm = (void *)arg;
138 
139 	/*
140 	 * Because this is the low zmm registers, we actually use the max ymm
141 	 * value as that's what actually fits in the low zmm and not the full
142 	 * definition.
143 	 */
144 	for (uint32_t i = 0; i < XSU_MAX_YMM; i++) {
145 		(void) memcpy(&zmm->prx_zmm[i]._l[0],
146 		    &fpu->xf_reg[i]._l[XSU_YMM_U32],
147 		    XSU_YMM_U32 * sizeof (uint32_t));
148 	}
149 }
150 
151 static void
152 xsu_overwrite_uctx_hi_zmm(uintptr_t arg, const xsu_fpu_t *fpu)
153 {
154 #ifdef __amd64
155 	prxregset_hi_zmm_t *zmm = (void *)arg;
156 
157 	for (uint32_t i = XSU_MAX_YMM; i < XSU_MAX_ZMM; i++) {
158 		(void) memcpy(&zmm->prx_hi_zmm[i - XSU_MAX_YMM]._l[0],
159 		    &fpu->xf_reg[i]._l[0],
160 		    XSU_ZMM_U32 * sizeof (uint32_t));
161 	}
162 #else	/* !__amd64 */
163 	warnx("attempted to set High ZMM registers on a 32-bit process!");
164 	abort();
165 #endif	/* __amd64 */
166 }
167 
168 void
169 xsu_overwrite_uctx(ucontext_t *uctx, const xsu_fpu_t *fpu, uint32_t hwsup)
170 {
171 	size_t xsave_size = sizeof (uc_xsave_t);
172 	void *new_buf;
173 	uc_xsave_t *ucs;
174 	uintptr_t write_ptr;
175 
176 	if (hwsup != XSU_YMM && hwsup != XSU_ZMM) {
177 		errx(EXIT_FAILURE, "given unknown xsu level: 0x%x", hwsup);
178 	}
179 
180 	if (hwsup >= XSU_YMM) {
181 		xsave_size += sizeof (prxregset_ymm_t);
182 	}
183 
184 	if (hwsup >= XSU_ZMM) {
185 		xsave_size += sizeof (prxregset_zmm_t);
186 		xsave_size += sizeof (prxregset_opmask_t);
187 		if (XSU_MAX_ZMM > 16) {
188 			xsave_size += sizeof (prxregset_hi_zmm_t);
189 		}
190 	}
191 
192 	new_buf = calloc(1, xsave_size);
193 	if (new_buf == NULL) {
194 		errx(EXIT_FAILURE, "failed to allocate xsave buf");
195 	}
196 	ucs = new_buf;
197 	ucs->ucx_vers = UC_XSAVE_VERS;
198 	ucs->ucx_len = xsave_size;
199 	if (hwsup >= XSU_YMM) {
200 		ucs->ucx_bv |= XFEATURE_AVX;
201 	}
202 
203 	if (hwsup >= XSU_ZMM) {
204 		ucs->ucx_bv |= XFEATURE_AVX512_OPMASK | XFEATURE_AVX512_ZMM;
205 		if (XSU_MAX_ZMM > 16)
206 			ucs->ucx_bv |= XFEATURE_AVX512_HI_ZMM;
207 	}
208 
209 	/*
210 	 * At this point we have rigged things up. XMM values are in the
211 	 * ucontext_t itself. After that we must write things out in the kernel
212 	 * signal order. Note, the XMM state is not set in the bit-vector
213 	 * because well, we don't actually use the xsave pieces for it because o
214 	 * the ucontext_t ABI has the xmm state always there. See
215 	 * uts/intel/os/fpu.c's big theory statement for more info.
216 	 */
217 	xsu_overwrite_uctx_xmm(uctx, fpu);
218 	write_ptr = (uintptr_t)new_buf + sizeof (uc_xsave_t);
219 	if (hwsup >= XSU_YMM) {
220 		xsu_overwrite_uctx_ymm(write_ptr, fpu);
221 		write_ptr += sizeof (prxregset_ymm_t);
222 	}
223 
224 	if (hwsup >= XSU_ZMM) {
225 		(void) memcpy((void *)write_ptr, fpu->xf_opmask,
226 		    sizeof (fpu->xf_opmask));
227 		write_ptr += sizeof (fpu->xf_opmask);
228 		xsu_overwrite_uctx_zmm(write_ptr, fpu);
229 		write_ptr += sizeof (prxregset_zmm_t);
230 		if (XSU_MAX_ZMM > 16) {
231 			xsu_overwrite_uctx_hi_zmm(write_ptr, fpu);
232 			write_ptr += sizeof (prxregset_hi_zmm_t);
233 		}
234 	}
235 
236 	uctx->uc_xsave = (long)(uintptr_t)new_buf;
237 }
238 
239 static boolean_t
240 xsu_check_vector(const upad512_t *src, const upad512_t *chk, uint32_t regno,
241     uint32_t nu32)
242 {
243 	boolean_t valid = B_TRUE;
244 
245 	for (uint32_t i = 0; i < nu32; i++) {
246 		if (src->_l[i] != chk->_l[i]) {
247 			warnx("vec[%u] u32 %u differs: expected 0x%x, "
248 			    "found 0x%x", regno, i, src->_l[i], chk->_l[i]);
249 			valid = B_FALSE;
250 		}
251 	}
252 
253 	return (valid);
254 }
255 
256 boolean_t
257 xsu_same(const xsu_fpu_t *src, const xsu_fpu_t *check, uint32_t hwsup)
258 {
259 	boolean_t valid = B_TRUE;
260 
261 	switch (hwsup) {
262 	default:
263 		errx(EXIT_FAILURE, "given unknown xsu level: 0x%x", hwsup);
264 	case XSU_YMM:
265 		for (uint32_t i = 0; i < XSU_MAX_YMM; i++) {
266 			if (!xsu_check_vector(&src->xf_reg[i],
267 			    &check->xf_reg[i], i, XSU_YMM_U32)) {
268 				valid = B_FALSE;
269 			}
270 		}
271 		break;
272 	case XSU_ZMM:
273 		for (uint32_t i = 0; i < XSU_MAX_ZMM; i++) {
274 			if (!xsu_check_vector(&src->xf_reg[i],
275 			    &check->xf_reg[i], i, XSU_ZMM_U32)) {
276 				valid = B_FALSE;
277 			}
278 		}
279 		for (uint32_t i = 0; i < ARRAY_SIZE(src->xf_opmask); i++) {
280 			if (src->xf_opmask[i] != check->xf_opmask[i]) {
281 				warnx("mask[%u] differs: expected 0x%" PRIx64
282 				    ", found 0x%" PRIx64, i, src->xf_opmask[i],
283 				    check->xf_opmask[i]);
284 				valid = B_FALSE;
285 			}
286 		}
287 		break;
288 	}
289 	return (valid);
290 }
291 
292 
293 void *
294 xsu_sleeper_thread(void *arg __unused)
295 {
296 	for (;;) {
297 		(void) sleep(100);
298 	}
299 	return (NULL);
300 }
301 
302 static void
303 xsu_dump_vector(FILE *f, const upad512_t *reg, uint32_t nu32, const char *name,
304     uint32_t idx)
305 {
306 	VERIFY3U(nu32 % 4, ==, 0);
307 	for (uint32_t i = 0; i < nu32; i += 4) {
308 		(void) fprintf(f, "%s[%02u] [%02u:%02u] = { 0x%08x 0x%08x "
309 		    "0x%08x 0x%08x }\n", name, idx, i + 3, i,  reg->_l[i + 3],
310 		    reg->_l[i + 2], reg->_l[i + 1], reg->_l[i]);
311 	}
312 }
313 
314 void
315 xsu_dump(FILE *f, const xsu_fpu_t *fpu, uint32_t hwsup)
316 {
317 
318 	switch (hwsup) {
319 	default:
320 		errx(EXIT_FAILURE, "given unknown xsu level: 0x%x", hwsup);
321 	case XSU_YMM:
322 		for (uint32_t i = 0; i < XSU_MAX_YMM; i++) {
323 			xsu_dump_vector(f, &fpu->xf_reg[i], XSU_YMM_U32,
324 			    "ymm", i);
325 		}
326 		break;
327 	case XSU_ZMM:
328 		for (uint32_t i = 0; i < XSU_MAX_ZMM; i++) {
329 			xsu_dump_vector(f, &fpu->xf_reg[i], XSU_ZMM_U32,
330 			    "zmm", i);
331 		}
332 
333 		for (uint32_t i = 0; i < ARRAY_SIZE(fpu->xf_opmask); i++) {
334 			(void) fprintf(f, "%%k%u 0x%016" PRIx64"\n", i,
335 			    fpu->xf_opmask[i]);
336 		}
337 		break;
338 	}
339 }
340 
341 typedef struct xsu_prx {
342 	uint32_t xp_hwsup;
343 	prxregset_xsave_t *xp_xsave;
344 	prxregset_ymm_t *xp_ymm;
345 	prxregset_opmask_t *xp_opmask;
346 	prxregset_zmm_t *xp_zmm;
347 	prxregset_hi_zmm_t *xp_hi_zmm;
348 } xsu_prx_t;
349 
350 static void
351 xsu_fpu_to_xregs_xsave(xsu_prx_t *prx, const xsu_fpu_t *fpu)
352 {
353 	prx->xp_xsave->prx_fx_fcw = FPU_CW_INIT;
354 	prx->xp_xsave->prx_fx_mxcsr = SSE_MXCSR_INIT;
355 	for (uint32_t i = 0; i < XSU_MAX_XMM; i++) {
356 		(void) memcpy(&prx->xp_xsave->prx_fx_xmm[i],
357 		    &fpu->xf_reg[i]._l[0], XSU_XMM_U32 * sizeof (uint32_t));
358 	}
359 
360 	prx->xp_xsave->prx_xsh_xstate_bv = XFEATURE_LEGACY_FP |
361 	    XFEATURE_SSE;
362 	if (prx->xp_hwsup >= XSU_YMM) {
363 		prx->xp_xsave->prx_xsh_xstate_bv |= XFEATURE_AVX;
364 	}
365 
366 	if (prx->xp_hwsup >= XSU_ZMM) {
367 		prx->xp_xsave->prx_xsh_xstate_bv |= XFEATURE_AVX512;
368 	}
369 }
370 
371 static void
372 xsu_fpu_to_xregs_ymm(xsu_prx_t *prx, const xsu_fpu_t *fpu)
373 {
374 	/* Copy the upper 128-bits to the YMM save area */
375 	for (uint32_t i = 0; i < XSU_MAX_YMM; i++) {
376 		(void) memcpy(&prx->xp_ymm->prx_ymm[i],
377 		    &fpu->xf_reg[i]._l[XSU_XMM_U32],
378 		    XSU_XMM_U32 * sizeof (uint32_t));
379 	}
380 }
381 
382 static void
383 xsu_fpu_to_xregs_zmm(xsu_prx_t *prx, const xsu_fpu_t *fpu)
384 {
385 	/* The lower 16 regs are only 256-bit, the upper are 512-bit */
386 	for (uint32_t i = 0; i < MIN(XSU_MAX_ZMM, 16); i++) {
387 		(void) memcpy(&prx->xp_zmm->prx_zmm[i],
388 		    &fpu->xf_reg[i]._l[XSU_YMM_U32],
389 		    XSU_YMM_U32 * sizeof (uint32_t));
390 	}
391 
392 #ifdef __amd64
393 	for (uint32_t i = 16; i < XSU_MAX_ZMM; i++) {
394 		(void) memcpy(&prx->xp_hi_zmm->prx_hi_zmm[i - 16],
395 		    &fpu->xf_reg[i]._l[0],
396 		    XSU_ZMM_U32 * sizeof (uint32_t));
397 	}
398 #endif
399 
400 	(void) memcpy(prx->xp_opmask->prx_opmask, fpu->xf_opmask,
401 	    sizeof (prx->xp_opmask->prx_opmask));
402 }
403 
404 
405 void
406 xsu_fpu_to_xregs(const xsu_fpu_t *fpu, uint32_t hwsup, prxregset_t **prxp,
407     size_t *sizep)
408 {
409 	uint32_t ninfo = 1, curinfo;
410 	size_t len = sizeof (prxregset_hdr_t) + sizeof (prxregset_info_t) +
411 	    sizeof (prxregset_xsave_t);
412 	prxregset_hdr_t *hdr;
413 	uint32_t off;
414 	xsu_prx_t prx;
415 
416 	if (hwsup != XSU_YMM && hwsup != XSU_ZMM) {
417 		errx(EXIT_FAILURE, "given unknown xsu level: 0x%x", hwsup);
418 	}
419 
420 	if (hwsup >= XSU_YMM) {
421 		len += sizeof (prxregset_info_t) + sizeof (prxregset_ymm_t);
422 		ninfo++;
423 	}
424 
425 	if (hwsup >= XSU_ZMM) {
426 		len += 3 * sizeof (prxregset_info_t) +
427 		    sizeof (prxregset_opmask_t) + sizeof (prxregset_zmm_t) +
428 		    sizeof (prxregset_hi_zmm_t);
429 		ninfo += 3;
430 	}
431 
432 	hdr = calloc(1, len);
433 	if (hdr == NULL) {
434 		err(EXIT_FAILURE, "failed to allocate prxregset_t (%zu bytes)",
435 		    len);
436 	}
437 	(void) memset(&prx, 0, sizeof (prx));
438 	prx.xp_hwsup = hwsup;
439 
440 #ifdef __amd64
441 	VERIFY3U(len, <=, UINT32_MAX);
442 #endif	/* __amd64 */
443 	hdr->pr_type = PR_TYPE_XSAVE;
444 	hdr->pr_size = (uint32_t)len;
445 	hdr->pr_ninfo = ninfo;
446 
447 	curinfo = 0;
448 	off = sizeof (prxregset_hdr_t) + sizeof (prxregset_info_t) * ninfo;
449 	hdr->pr_info[curinfo].pri_type = PRX_INFO_XSAVE;
450 	hdr->pr_info[curinfo].pri_size = sizeof (prxregset_xsave_t);
451 	hdr->pr_info[curinfo].pri_offset = off;
452 	prx.xp_xsave = (void *)((uintptr_t)hdr + off);
453 	off += sizeof (prxregset_xsave_t);
454 	curinfo++;
455 
456 	if (hwsup >= XSU_YMM) {
457 		hdr->pr_info[curinfo].pri_type = PRX_INFO_YMM;
458 		hdr->pr_info[curinfo].pri_size = sizeof (prxregset_ymm_t);
459 		hdr->pr_info[curinfo].pri_offset = off;
460 		prx.xp_ymm = (void *)((uintptr_t)hdr + off);
461 		off += sizeof (prxregset_ymm_t);
462 		curinfo++;
463 	}
464 
465 	if (hwsup >= XSU_ZMM) {
466 		hdr->pr_info[curinfo].pri_type = PRX_INFO_OPMASK;
467 		hdr->pr_info[curinfo].pri_size = sizeof (prxregset_opmask_t);
468 		hdr->pr_info[curinfo].pri_offset = off;
469 		prx.xp_opmask = (void *)((uintptr_t)hdr + off);
470 		off += sizeof (prxregset_opmask_t);
471 		curinfo++;
472 
473 		hdr->pr_info[curinfo].pri_type = PRX_INFO_ZMM;
474 		hdr->pr_info[curinfo].pri_size = sizeof (prxregset_zmm_t);
475 		hdr->pr_info[curinfo].pri_offset = off;
476 		prx.xp_zmm = (void *)((uintptr_t)hdr + off);
477 		off += sizeof (prxregset_zmm_t);
478 		curinfo++;
479 
480 		hdr->pr_info[curinfo].pri_type = PRX_INFO_HI_ZMM;
481 		hdr->pr_info[curinfo].pri_size = sizeof (prxregset_hi_zmm_t);
482 		hdr->pr_info[curinfo].pri_offset = off;
483 		prx.xp_hi_zmm = (void *)((uintptr_t)hdr + off);
484 		off += sizeof (prxregset_hi_zmm_t);
485 		curinfo++;
486 	}
487 
488 	xsu_fpu_to_xregs_xsave(&prx, fpu);
489 	if (hwsup >= XSU_YMM) {
490 		xsu_fpu_to_xregs_ymm(&prx, fpu);
491 	}
492 
493 	if (hwsup >= XSU_ZMM) {
494 		xsu_fpu_to_xregs_zmm(&prx, fpu);
495 	}
496 
497 	*prxp = (prxregset_t *)hdr;
498 	*sizep = len;
499 }
500 
501 /*
502  * This pairs with xsu_proc_finish() below. The goal is to allow us to inject
503  * state after hitting a breakpoint, which is generally used right before
504  * something wants to print data.
505  */
506 void
507 xsu_proc_bkpt(xsu_proc_t *xp)
508 {
509 	int perr;
510 	struct ps_prochandle *P;
511 	char *const argv[3] = { xp->xp_prog, xp->xp_arg, NULL };
512 	GElf_Sym sym;
513 
514 	P = Pcreate(xp->xp_prog, argv, &perr, NULL, 0);
515 	if (P == NULL) {
516 		errx(EXIT_FAILURE, "failed to create %s: %s", xp->xp_prog,
517 		    Pcreate_error(perr));
518 	}
519 
520 	xp->xp_proc = P;
521 	(void) Punsetflags(P, PR_RLC);
522 	if (Psetflags(P, PR_KLC | PR_BPTADJ) != 0) {
523 		int e = errno;
524 		Prelease(P, PRELEASE_KILL);
525 		errc(EXIT_FAILURE, e, "failed to set PR_KLC | PR_BPTADJ flags");
526 	}
527 
528 	if (Pxlookup_by_name(P, LM_ID_BASE, xp->xp_object, xp->xp_symname, &sym,
529 	    NULL) != 0) {
530 		err(EXIT_FAILURE, "failed to find %s`%s", xp->xp_object,
531 		    xp->xp_symname);
532 	}
533 
534 	if (Pfault(P, FLTBPT, 1) != 0) {
535 		errx(EXIT_FAILURE, "failed to set the FLTBPT disposition");
536 	}
537 
538 	xp->xp_addr = sym.st_value;
539 	if (Psetbkpt(P, sym.st_value, &xp->xp_instr) != 0) {
540 		err(EXIT_FAILURE, "failed to set breakpoint on xsu_getfpu "
541 		    "(0x%" PRIx64 ")", sym.st_value);
542 	}
543 
544 	if (Psetrun(P, 0, 0) != 0) {
545 		err(EXIT_FAILURE, "failed to resume running our target");
546 	}
547 
548 	if (Pwait(P, xsu_proc_timeout) != 0) {
549 		err(EXIT_FAILURE, "%s did not hit our expected breakpoint",
550 		    argv[1]);
551 	}
552 }
553 
554 /*
555  * Run a process to completion and get its wait exit status.
556  */
557 void
558 xsu_proc_finish(xsu_proc_t *xp)
559 {
560 	pid_t pid = Ppsinfo(xp->xp_proc)->pr_pid;
561 
562 	if (Pdelbkpt(xp->xp_proc, xp->xp_addr, xp->xp_instr) != 0) {
563 		err(EXIT_FAILURE, "failed to delete %s`%s() breakpoint",
564 		    xp->xp_object, xp->xp_symname);
565 	}
566 
567 	if (Psetrun(xp->xp_proc, 0, PRCFAULT) != 0) {
568 		err(EXIT_FAILURE, "failed to resume running our target");
569 	}
570 
571 	if (waitpid(pid, &xp->xp_wait, 0) != pid) {
572 		err(EXIT_FAILURE, "failed to get our child processes's (%"
573 		    _PRIdID "), wait info", pid);
574 	}
575 
576 	if (WIFEXITED(xp->xp_wait) == 0) {
577 		errx(EXIT_FAILURE, "our child process didn't actually exit!");
578 	}
579 
580 	Pfree(xp->xp_proc);
581 	xp->xp_proc = NULL;
582 }
583 
584 void
585 xsu_fpregset_xmm_set(fpregset_t *fpr, uint32_t seed)
586 {
587 	size_t nregs = ARRAY_SIZE(fpr->fp_reg_set.fpchip_state.xmm);
588 	for (uint32_t i = 0; i < nregs; i++) {
589 		upad128_t *u128 = &fpr->fp_reg_set.fpchip_state.xmm[i];
590 		for (uint32_t u32 = 0; u32 < XSU_XMM_U32; u32++, seed++) {
591 			u128->_l[u32] = seed;
592 		}
593 	}
594 }
595 
596 void
597 xsu_xregs_xmm_set(prxregset_t *prx, uint32_t seed)
598 {
599 	prxregset_hdr_t *hdr = (prxregset_hdr_t *)prx;
600 	prxregset_xsave_t *xsave = NULL;
601 
602 	for (uint32_t i = 0; i < hdr->pr_ninfo; i++) {
603 		if (hdr->pr_info[i].pri_type == PRX_INFO_XSAVE) {
604 			xsave = (void *)((uintptr_t)prx +
605 			    hdr->pr_info[i].pri_offset);
606 			break;
607 		}
608 	}
609 
610 	if (xsave == NULL) {
611 		errx(EXIT_FAILURE, "asked to set xsave %%xmm regs, but no "
612 		    "xsave info present");
613 	}
614 
615 	size_t nregs = ARRAY_SIZE(xsave->prx_fx_xmm);
616 	for (uint32_t i = 0; i < nregs; i++) {
617 		for (uint32_t u32 = 0; u32 < XSU_XMM_U32; u32++, seed++) {
618 			xsave->prx_fx_xmm[i]._l[u32] = seed;
619 		}
620 	}
621 }
622 
623 static const prxregset_info_t *
624 xsu_xregs_find_comp(const prxregset_hdr_t *hdr, uint32_t comp, uintptr_t *datap)
625 {
626 	for (uint32_t i = 0; i < hdr->pr_ninfo; i++) {
627 		if (hdr->pr_info[i].pri_type == comp) {
628 			*datap = (uintptr_t)hdr + hdr->pr_info[i].pri_offset;
629 			return (&hdr->pr_info[i]);
630 		}
631 	}
632 
633 	return (NULL);
634 }
635 
636 boolean_t
637 xsu_xregs_comp_equal(const prxregset_t *src, const prxregset_t *dest,
638     uint32_t comp)
639 {
640 	const prxregset_hdr_t *shdr = (prxregset_hdr_t *)src;
641 	const prxregset_hdr_t *dhdr = (prxregset_hdr_t *)dest;
642 	const prxregset_info_t *sinfo = NULL, *dinfo = NULL;
643 	uintptr_t sdata, ddata;
644 
645 	sinfo = xsu_xregs_find_comp(shdr, comp, &sdata);
646 	if (sinfo == NULL) {
647 		warnx("source xregs missing component %u", comp);
648 		return (B_FALSE);
649 	}
650 
651 	dinfo = xsu_xregs_find_comp(dhdr, comp, &ddata);
652 	if (dinfo == NULL) {
653 		warnx("destination xregs missing component %u", comp);
654 		return (B_FALSE);
655 	}
656 
657 	if (sinfo->pri_size != dinfo->pri_size) {
658 		warnx("source xregs length 0x%x does not match dest xregs 0x%x",
659 		    sinfo->pri_size, dinfo->pri_size);
660 	}
661 
662 	if (bcmp((void *)sdata, (void *)ddata, sinfo->pri_size) != 0) {
663 		warnx("component data differs: dumping!");
664 		for (uint32_t i = 0; i < sinfo->pri_offset; i++) {
665 			const uint8_t *su8 = (uint8_t *)sdata;
666 			const uint8_t *du8 = (uint8_t *)ddata;
667 
668 			if (su8[i] != du8[i]) {
669 				(void) fprintf(stderr,
670 				    "src[%u] = 0x%2x\tdst[%u] = 0x%x\n",
671 				    i, su8[i], i, du8[i]);
672 			}
673 		}
674 
675 		return (B_FALSE);
676 	}
677 
678 	return (B_TRUE);
679 }
680 
681 boolean_t
682 xsu_fpregs_cmp(const fpregset_t *fpr, const prxregset_t *prx)
683 {
684 	boolean_t valid = B_TRUE;
685 	const prxregset_hdr_t *hdr = (prxregset_hdr_t *)prx;
686 	const prxregset_xsave_t *xsave = NULL;
687 	uint16_t fpr_cw, fpr_sw;
688 
689 	for (uint32_t i = 0; i < hdr->pr_ninfo; i++) {
690 		if (hdr->pr_info[i].pri_type == PRX_INFO_XSAVE) {
691 			xsave = (void *)((uintptr_t)prx +
692 			    hdr->pr_info[i].pri_offset);
693 			break;
694 		}
695 	}
696 
697 	if (xsave == NULL) {
698 		warnx("xregs missing xsave component for fpregs comparison");
699 		return (B_FALSE);
700 	}
701 
702 	/*
703 	 * First check the XMM registers because those don't require ifdefs,
704 	 * thankfully.
705 	 */
706 	size_t nregs = ARRAY_SIZE(fpr->fp_reg_set.fpchip_state.xmm);
707 	for (size_t i = 0; i < nregs; i++) {
708 		const upad128_t *u128 = &fpr->fp_reg_set.fpchip_state.xmm[i];
709 		for (uint32_t u32 = 0; u32 < XSU_XMM_U32; u32++) {
710 			if (u128->_l[u32] != xsave->prx_fx_xmm[i]._l[u32]) {
711 				valid = B_FALSE;
712 				(void) fprintf(stderr, "fpregset xmm[%u] "
713 				    "u32[%u] does not match xsave, fpregset: "
714 				    "0x%x, xsave: 0x%x\n", i, u32,
715 				    u128->_l[u32],
716 				    xsave->prx_fx_xmm[i]._l[u32]);
717 			}
718 		}
719 	}
720 
721 	if (xsave->prx_fx_mxcsr != fpr->fp_reg_set.fpchip_state.mxcsr) {
722 		valid = B_FALSE;
723 		(void) fprintf(stderr, "mxcsr mismatched: fpregset: 0x%x, "
724 		    "xsave: 0x%x\n", fpr->fp_reg_set.fpchip_state.mxcsr,
725 		    xsave->prx_fx_mxcsr);
726 	}
727 
728 	/*
729 	 * Extract the basic x87 state. This requires ifdefs because the 32-bit
730 	 * ABI here is a bit, particular. The 32-bit fpregs is the mcontext_t
731 	 * struct which is mostly opaque and we need to use the ieeefp.h types
732 	 * which are only visible for ILP32. It also treats 16-bit values as
733 	 * 32-bit ones, hence masking below.
734 	 */
735 #ifdef __amd64
736 	fpr_cw = fpr->fp_reg_set.fpchip_state.cw;
737 	fpr_sw = fpr->fp_reg_set.fpchip_state.sw;
738 #else	/* !__amd64 (__i386) */
739 	struct _fpstate fps;
740 
741 	(void) memcpy(&fps, &fpr->fp_reg_set.fpchip_state, sizeof (fps));
742 	fpr_cw = fps.cw & 0xffff;
743 	fpr_sw = fps.sw & 0xffff;
744 #endif	/* __amd64 */
745 
746 	if (fpr_cw != xsave->prx_fx_fcw) {
747 		valid = B_FALSE;
748 		(void) fprintf(stderr, "x87 cw mismatched: fpregset: 0x%x, "
749 		    "xsave: 0x%x\n", fpr_cw, xsave->prx_fx_fcw);
750 	}
751 
752 	if (fpr_sw != xsave->prx_fx_fsw) {
753 		valid = B_FALSE;
754 		(void) fprintf(stderr, "x87 sw mismatched: fpregset: 0x%x, "
755 		    "xsave: 0x%x\n", fpr_sw, xsave->prx_fx_fsw);
756 	}
757 
758 	return (valid);
759 }
760 
761 void
762 xsu_ustack_alloc(ucontext_t *ctx)
763 {
764 	static void *stack = NULL;
765 	static size_t size = 0;
766 
767 	if (size == 0) {
768 		long sys = sysconf(_SC_THREAD_STACK_MIN);
769 		if (sys == -1) {
770 			err(EXIT_FAILURE, "failed to get minimum stack size");
771 		}
772 		size = (size_t)sys;
773 
774 		stack = calloc(size, sizeof (uint8_t));
775 		if (stack == NULL) {
776 			err(EXIT_FAILURE, "failed to allocate stack buffer");
777 		}
778 	}
779 
780 	ctx->uc_stack.ss_size = size;
781 	ctx->uc_stack.ss_sp = stack;
782 	ctx->uc_stack.ss_flags = 0;
783 }
784