1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2023 Oxide Computer Company
14 */
15
16 /*
17 * This file implements various utility functions we use for the xsave tests.
18 */
19
20 #include <string.h>
21 #include <strings.h>
22 #include <sys/auxv.h>
23 #include <sys/sysmacros.h>
24 #include <err.h>
25 #include <stdlib.h>
26 #include <procfs.h>
27 #include <sys/x86_archext.h>
28 #include <unistd.h>
29 #include <errno.h>
30 #include <sys/types.h>
31 #include <sys/wait.h>
32 #include <sys/debug.h>
33 #include <ieeefp.h>
34
35 #include "xsave_util.h"
36
37 static uint_t xsu_proc_timeout = 60 * 1000; /* 60s in ms */
38
39 /*
40 * Determine if we have the hardware support required for a given level of
41 * hardware support.
42 */
43 uint32_t
xsu_hwsupport(void)44 xsu_hwsupport(void)
45 {
46 uint_t isa[3];
47 uint_t nisa = getisax(isa, ARRAY_SIZE(isa));
48
49 if (nisa != ARRAY_SIZE(isa)) {
50 errx(EXIT_FAILURE, "did not get all %zu hwcap values, found %u",
51 ARRAY_SIZE(isa), nisa);
52 }
53
54 if ((isa[0] & AV_386_XSAVE) == 0) {
55 errx(EXIT_FAILURE, "xsave not present: this test should have "
56 "been skipped");
57 }
58
59 if ((isa[1] & AV_386_2_AVX512F) != 0) {
60 warnx("found %%zmm support");
61 return (XSU_ZMM);
62 }
63
64 if ((isa[0] & AV_386_AVX) != 0) {
65 warnx("found %%ymm support");
66 return (XSU_YMM);
67 }
68
69 errx(EXIT_FAILURE, "no non-XMM xsave state found: this test should "
70 "have been skipped");
71 }
72
73 /*
74 * Fill all the valid regions of an FPU based on treating the vector register as
75 * a series of uint32_t values and going from there.
76 */
77 void
xsu_fill(xsu_fpu_t * fpu,uint32_t level,uint32_t start)78 xsu_fill(xsu_fpu_t *fpu, uint32_t level, uint32_t start)
79 {
80 (void) memset(fpu, 0, sizeof (xsu_fpu_t));
81
82 switch (level) {
83 default:
84 errx(EXIT_FAILURE, "given unknown xsu level: 0x%x", level);
85 case XSU_YMM:
86 for (uint32_t regno = 0; regno < XSU_MAX_YMM; regno++) {
87 for (uint32_t u32 = 0; u32 < XSU_YMM_U32; u32++,
88 start++) {
89 fpu->xf_reg[regno]._l[u32] = start;
90 }
91 }
92 break;
93 case XSU_ZMM:
94 for (uint32_t regno = 0; regno < XSU_MAX_ZMM; regno++) {
95 for (uint32_t u32 = 0; u32 < XSU_ZMM_U32; u32++,
96 start++) {
97 fpu->xf_reg[regno]._l[u32] = start;
98 }
99 }
100 for (uint32_t regno = 0; regno < ARRAY_SIZE(fpu->xf_opmask);
101 regno++) {
102 uint64_t val = start | (((uint64_t)start + 1) << 32);
103 fpu->xf_opmask[regno] = val;
104 start += 2;
105 }
106 break;
107 }
108 }
109
110 static void
xsu_overwrite_uctx_xmm(ucontext_t * uctx,const xsu_fpu_t * fpu)111 xsu_overwrite_uctx_xmm(ucontext_t *uctx, const xsu_fpu_t *fpu)
112 {
113 struct _fpchip_state *fp;
114
115 fp = &uctx->uc_mcontext.fpregs.fp_reg_set.fpchip_state;
116 for (uint32_t i = 0; i < XSU_MAX_XMM; i++) {
117 (void) memcpy(&fp->xmm[i], &fpu->xf_reg[i]._l[0],
118 XSU_XMM_U32 * sizeof (uint32_t));
119 }
120 }
121
122 static void
xsu_overwrite_uctx_ymm(uintptr_t arg,const xsu_fpu_t * fpu)123 xsu_overwrite_uctx_ymm(uintptr_t arg, const xsu_fpu_t *fpu)
124 {
125 prxregset_ymm_t *ymm = (void *)arg;
126
127 for (uint32_t i = 0; i < XSU_MAX_YMM; i++) {
128 (void) memcpy(&ymm->prx_ymm[i]._l[0],
129 &fpu->xf_reg[i]._l[XSU_XMM_U32],
130 XSU_XMM_U32 * sizeof (uint32_t));
131 }
132 }
133
134 static void
xsu_overwrite_uctx_zmm(uintptr_t arg,const xsu_fpu_t * fpu)135 xsu_overwrite_uctx_zmm(uintptr_t arg, const xsu_fpu_t *fpu)
136 {
137 prxregset_zmm_t *zmm = (void *)arg;
138
139 /*
140 * Because this is the low zmm registers, we actually use the max ymm
141 * value as that's what actually fits in the low zmm and not the full
142 * definition.
143 */
144 for (uint32_t i = 0; i < XSU_MAX_YMM; i++) {
145 (void) memcpy(&zmm->prx_zmm[i]._l[0],
146 &fpu->xf_reg[i]._l[XSU_YMM_U32],
147 XSU_YMM_U32 * sizeof (uint32_t));
148 }
149 }
150
151 static void
xsu_overwrite_uctx_hi_zmm(uintptr_t arg,const xsu_fpu_t * fpu)152 xsu_overwrite_uctx_hi_zmm(uintptr_t arg, const xsu_fpu_t *fpu)
153 {
154 #ifdef __amd64
155 prxregset_hi_zmm_t *zmm = (void *)arg;
156
157 for (uint32_t i = XSU_MAX_YMM; i < XSU_MAX_ZMM; i++) {
158 (void) memcpy(&zmm->prx_hi_zmm[i - XSU_MAX_YMM]._l[0],
159 &fpu->xf_reg[i]._l[0],
160 XSU_ZMM_U32 * sizeof (uint32_t));
161 }
162 #else /* !__amd64 */
163 warnx("attempted to set High ZMM registers on a 32-bit process!");
164 abort();
165 #endif /* __amd64 */
166 }
167
168 void
xsu_overwrite_uctx(ucontext_t * uctx,const xsu_fpu_t * fpu,uint32_t hwsup)169 xsu_overwrite_uctx(ucontext_t *uctx, const xsu_fpu_t *fpu, uint32_t hwsup)
170 {
171 size_t xsave_size = sizeof (uc_xsave_t);
172 void *new_buf;
173 uc_xsave_t *ucs;
174 uintptr_t write_ptr;
175
176 if (hwsup != XSU_YMM && hwsup != XSU_ZMM) {
177 errx(EXIT_FAILURE, "given unknown xsu level: 0x%x", hwsup);
178 }
179
180 if (hwsup >= XSU_YMM) {
181 xsave_size += sizeof (prxregset_ymm_t);
182 }
183
184 if (hwsup >= XSU_ZMM) {
185 xsave_size += sizeof (prxregset_zmm_t);
186 xsave_size += sizeof (prxregset_opmask_t);
187 if (XSU_MAX_ZMM > 16) {
188 xsave_size += sizeof (prxregset_hi_zmm_t);
189 }
190 }
191
192 new_buf = calloc(1, xsave_size);
193 if (new_buf == NULL) {
194 errx(EXIT_FAILURE, "failed to allocate xsave buf");
195 }
196 ucs = new_buf;
197 ucs->ucx_vers = UC_XSAVE_VERS;
198 ucs->ucx_len = xsave_size;
199 if (hwsup >= XSU_YMM) {
200 ucs->ucx_bv |= XFEATURE_AVX;
201 }
202
203 if (hwsup >= XSU_ZMM) {
204 ucs->ucx_bv |= XFEATURE_AVX512_OPMASK | XFEATURE_AVX512_ZMM;
205 if (XSU_MAX_ZMM > 16)
206 ucs->ucx_bv |= XFEATURE_AVX512_HI_ZMM;
207 }
208
209 /*
210 * At this point we have rigged things up. XMM values are in the
211 * ucontext_t itself. After that we must write things out in the kernel
212 * signal order. Note, the XMM state is not set in the bit-vector
213 * because well, we don't actually use the xsave pieces for it because o
214 * the ucontext_t ABI has the xmm state always there. See
215 * uts/intel/os/fpu.c's big theory statement for more info.
216 */
217 xsu_overwrite_uctx_xmm(uctx, fpu);
218 write_ptr = (uintptr_t)new_buf + sizeof (uc_xsave_t);
219 if (hwsup >= XSU_YMM) {
220 xsu_overwrite_uctx_ymm(write_ptr, fpu);
221 write_ptr += sizeof (prxregset_ymm_t);
222 }
223
224 if (hwsup >= XSU_ZMM) {
225 (void) memcpy((void *)write_ptr, fpu->xf_opmask,
226 sizeof (fpu->xf_opmask));
227 write_ptr += sizeof (fpu->xf_opmask);
228 xsu_overwrite_uctx_zmm(write_ptr, fpu);
229 write_ptr += sizeof (prxregset_zmm_t);
230 if (XSU_MAX_ZMM > 16) {
231 xsu_overwrite_uctx_hi_zmm(write_ptr, fpu);
232 write_ptr += sizeof (prxregset_hi_zmm_t);
233 }
234 }
235
236 uctx->uc_xsave = (long)(uintptr_t)new_buf;
237 }
238
239 static boolean_t
xsu_check_vector(const upad512_t * src,const upad512_t * chk,uint32_t regno,uint32_t nu32)240 xsu_check_vector(const upad512_t *src, const upad512_t *chk, uint32_t regno,
241 uint32_t nu32)
242 {
243 boolean_t valid = B_TRUE;
244
245 for (uint32_t i = 0; i < nu32; i++) {
246 if (src->_l[i] != chk->_l[i]) {
247 warnx("vec[%u] u32 %u differs: expected 0x%x, "
248 "found 0x%x", regno, i, src->_l[i], chk->_l[i]);
249 valid = B_FALSE;
250 }
251 }
252
253 return (valid);
254 }
255
256 boolean_t
xsu_same(const xsu_fpu_t * src,const xsu_fpu_t * check,uint32_t hwsup)257 xsu_same(const xsu_fpu_t *src, const xsu_fpu_t *check, uint32_t hwsup)
258 {
259 boolean_t valid = B_TRUE;
260
261 switch (hwsup) {
262 default:
263 errx(EXIT_FAILURE, "given unknown xsu level: 0x%x", hwsup);
264 case XSU_YMM:
265 for (uint32_t i = 0; i < XSU_MAX_YMM; i++) {
266 if (!xsu_check_vector(&src->xf_reg[i],
267 &check->xf_reg[i], i, XSU_YMM_U32)) {
268 valid = B_FALSE;
269 }
270 }
271 break;
272 case XSU_ZMM:
273 for (uint32_t i = 0; i < XSU_MAX_ZMM; i++) {
274 if (!xsu_check_vector(&src->xf_reg[i],
275 &check->xf_reg[i], i, XSU_ZMM_U32)) {
276 valid = B_FALSE;
277 }
278 }
279 for (uint32_t i = 0; i < ARRAY_SIZE(src->xf_opmask); i++) {
280 if (src->xf_opmask[i] != check->xf_opmask[i]) {
281 warnx("mask[%u] differs: expected 0x%" PRIx64
282 ", found 0x%" PRIx64, i, src->xf_opmask[i],
283 check->xf_opmask[i]);
284 valid = B_FALSE;
285 }
286 }
287 break;
288 }
289 return (valid);
290 }
291
292
293 void *
xsu_sleeper_thread(void * arg __unused)294 xsu_sleeper_thread(void *arg __unused)
295 {
296 for (;;) {
297 (void) sleep(100);
298 }
299 return (NULL);
300 }
301
302 static void
xsu_dump_vector(FILE * f,const upad512_t * reg,uint32_t nu32,const char * name,uint32_t idx)303 xsu_dump_vector(FILE *f, const upad512_t *reg, uint32_t nu32, const char *name,
304 uint32_t idx)
305 {
306 VERIFY3U(nu32 % 4, ==, 0);
307 for (uint32_t i = 0; i < nu32; i += 4) {
308 (void) fprintf(f, "%s[%02u] [%02u:%02u] = { 0x%08x 0x%08x "
309 "0x%08x 0x%08x }\n", name, idx, i + 3, i, reg->_l[i + 3],
310 reg->_l[i + 2], reg->_l[i + 1], reg->_l[i]);
311 }
312 }
313
314 void
xsu_dump(FILE * f,const xsu_fpu_t * fpu,uint32_t hwsup)315 xsu_dump(FILE *f, const xsu_fpu_t *fpu, uint32_t hwsup)
316 {
317
318 switch (hwsup) {
319 default:
320 errx(EXIT_FAILURE, "given unknown xsu level: 0x%x", hwsup);
321 case XSU_YMM:
322 for (uint32_t i = 0; i < XSU_MAX_YMM; i++) {
323 xsu_dump_vector(f, &fpu->xf_reg[i], XSU_YMM_U32,
324 "ymm", i);
325 }
326 break;
327 case XSU_ZMM:
328 for (uint32_t i = 0; i < XSU_MAX_ZMM; i++) {
329 xsu_dump_vector(f, &fpu->xf_reg[i], XSU_ZMM_U32,
330 "zmm", i);
331 }
332
333 for (uint32_t i = 0; i < ARRAY_SIZE(fpu->xf_opmask); i++) {
334 (void) fprintf(f, "%%k%u 0x%016" PRIx64"\n", i,
335 fpu->xf_opmask[i]);
336 }
337 break;
338 }
339 }
340
341 typedef struct xsu_prx {
342 uint32_t xp_hwsup;
343 prxregset_xsave_t *xp_xsave;
344 prxregset_ymm_t *xp_ymm;
345 prxregset_opmask_t *xp_opmask;
346 prxregset_zmm_t *xp_zmm;
347 prxregset_hi_zmm_t *xp_hi_zmm;
348 } xsu_prx_t;
349
350 static void
xsu_fpu_to_xregs_xsave(xsu_prx_t * prx,const xsu_fpu_t * fpu)351 xsu_fpu_to_xregs_xsave(xsu_prx_t *prx, const xsu_fpu_t *fpu)
352 {
353 prx->xp_xsave->prx_fx_fcw = FPU_CW_INIT;
354 prx->xp_xsave->prx_fx_mxcsr = SSE_MXCSR_INIT;
355 for (uint32_t i = 0; i < XSU_MAX_XMM; i++) {
356 (void) memcpy(&prx->xp_xsave->prx_fx_xmm[i],
357 &fpu->xf_reg[i]._l[0], XSU_XMM_U32 * sizeof (uint32_t));
358 }
359
360 prx->xp_xsave->prx_xsh_xstate_bv = XFEATURE_LEGACY_FP |
361 XFEATURE_SSE;
362 if (prx->xp_hwsup >= XSU_YMM) {
363 prx->xp_xsave->prx_xsh_xstate_bv |= XFEATURE_AVX;
364 }
365
366 if (prx->xp_hwsup >= XSU_ZMM) {
367 prx->xp_xsave->prx_xsh_xstate_bv |= XFEATURE_AVX512;
368 }
369 }
370
371 static void
xsu_fpu_to_xregs_ymm(xsu_prx_t * prx,const xsu_fpu_t * fpu)372 xsu_fpu_to_xregs_ymm(xsu_prx_t *prx, const xsu_fpu_t *fpu)
373 {
374 /* Copy the upper 128-bits to the YMM save area */
375 for (uint32_t i = 0; i < XSU_MAX_YMM; i++) {
376 (void) memcpy(&prx->xp_ymm->prx_ymm[i],
377 &fpu->xf_reg[i]._l[XSU_XMM_U32],
378 XSU_XMM_U32 * sizeof (uint32_t));
379 }
380 }
381
382 static void
xsu_fpu_to_xregs_zmm(xsu_prx_t * prx,const xsu_fpu_t * fpu)383 xsu_fpu_to_xregs_zmm(xsu_prx_t *prx, const xsu_fpu_t *fpu)
384 {
385 /* The lower 16 regs are only 256-bit, the upper are 512-bit */
386 for (uint32_t i = 0; i < MIN(XSU_MAX_ZMM, 16); i++) {
387 (void) memcpy(&prx->xp_zmm->prx_zmm[i],
388 &fpu->xf_reg[i]._l[XSU_YMM_U32],
389 XSU_YMM_U32 * sizeof (uint32_t));
390 }
391
392 #ifdef __amd64
393 for (uint32_t i = 16; i < XSU_MAX_ZMM; i++) {
394 (void) memcpy(&prx->xp_hi_zmm->prx_hi_zmm[i - 16],
395 &fpu->xf_reg[i]._l[0],
396 XSU_ZMM_U32 * sizeof (uint32_t));
397 }
398 #endif
399
400 (void) memcpy(prx->xp_opmask->prx_opmask, fpu->xf_opmask,
401 sizeof (prx->xp_opmask->prx_opmask));
402 }
403
404
405 void
xsu_fpu_to_xregs(const xsu_fpu_t * fpu,uint32_t hwsup,prxregset_t ** prxp,size_t * sizep)406 xsu_fpu_to_xregs(const xsu_fpu_t *fpu, uint32_t hwsup, prxregset_t **prxp,
407 size_t *sizep)
408 {
409 uint32_t ninfo = 1, curinfo;
410 size_t len = sizeof (prxregset_hdr_t) + sizeof (prxregset_info_t) +
411 sizeof (prxregset_xsave_t);
412 prxregset_hdr_t *hdr;
413 uint32_t off;
414 xsu_prx_t prx;
415
416 if (hwsup != XSU_YMM && hwsup != XSU_ZMM) {
417 errx(EXIT_FAILURE, "given unknown xsu level: 0x%x", hwsup);
418 }
419
420 if (hwsup >= XSU_YMM) {
421 len += sizeof (prxregset_info_t) + sizeof (prxregset_ymm_t);
422 ninfo++;
423 }
424
425 if (hwsup >= XSU_ZMM) {
426 len += 3 * sizeof (prxregset_info_t) +
427 sizeof (prxregset_opmask_t) + sizeof (prxregset_zmm_t) +
428 sizeof (prxregset_hi_zmm_t);
429 ninfo += 3;
430 }
431
432 hdr = calloc(1, len);
433 if (hdr == NULL) {
434 err(EXIT_FAILURE, "failed to allocate prxregset_t (%zu bytes)",
435 len);
436 }
437 (void) memset(&prx, 0, sizeof (prx));
438 prx.xp_hwsup = hwsup;
439
440 #ifdef __amd64
441 VERIFY3U(len, <=, UINT32_MAX);
442 #endif /* __amd64 */
443 hdr->pr_type = PR_TYPE_XSAVE;
444 hdr->pr_size = (uint32_t)len;
445 hdr->pr_ninfo = ninfo;
446
447 curinfo = 0;
448 off = sizeof (prxregset_hdr_t) + sizeof (prxregset_info_t) * ninfo;
449 hdr->pr_info[curinfo].pri_type = PRX_INFO_XSAVE;
450 hdr->pr_info[curinfo].pri_size = sizeof (prxregset_xsave_t);
451 hdr->pr_info[curinfo].pri_offset = off;
452 prx.xp_xsave = (void *)((uintptr_t)hdr + off);
453 off += sizeof (prxregset_xsave_t);
454 curinfo++;
455
456 if (hwsup >= XSU_YMM) {
457 hdr->pr_info[curinfo].pri_type = PRX_INFO_YMM;
458 hdr->pr_info[curinfo].pri_size = sizeof (prxregset_ymm_t);
459 hdr->pr_info[curinfo].pri_offset = off;
460 prx.xp_ymm = (void *)((uintptr_t)hdr + off);
461 off += sizeof (prxregset_ymm_t);
462 curinfo++;
463 }
464
465 if (hwsup >= XSU_ZMM) {
466 hdr->pr_info[curinfo].pri_type = PRX_INFO_OPMASK;
467 hdr->pr_info[curinfo].pri_size = sizeof (prxregset_opmask_t);
468 hdr->pr_info[curinfo].pri_offset = off;
469 prx.xp_opmask = (void *)((uintptr_t)hdr + off);
470 off += sizeof (prxregset_opmask_t);
471 curinfo++;
472
473 hdr->pr_info[curinfo].pri_type = PRX_INFO_ZMM;
474 hdr->pr_info[curinfo].pri_size = sizeof (prxregset_zmm_t);
475 hdr->pr_info[curinfo].pri_offset = off;
476 prx.xp_zmm = (void *)((uintptr_t)hdr + off);
477 off += sizeof (prxregset_zmm_t);
478 curinfo++;
479
480 hdr->pr_info[curinfo].pri_type = PRX_INFO_HI_ZMM;
481 hdr->pr_info[curinfo].pri_size = sizeof (prxregset_hi_zmm_t);
482 hdr->pr_info[curinfo].pri_offset = off;
483 prx.xp_hi_zmm = (void *)((uintptr_t)hdr + off);
484 off += sizeof (prxregset_hi_zmm_t);
485 curinfo++;
486 }
487
488 xsu_fpu_to_xregs_xsave(&prx, fpu);
489 if (hwsup >= XSU_YMM) {
490 xsu_fpu_to_xregs_ymm(&prx, fpu);
491 }
492
493 if (hwsup >= XSU_ZMM) {
494 xsu_fpu_to_xregs_zmm(&prx, fpu);
495 }
496
497 *prxp = (prxregset_t *)hdr;
498 *sizep = len;
499 }
500
501 /*
502 * This pairs with xsu_proc_finish() below. The goal is to allow us to inject
503 * state after hitting a breakpoint, which is generally used right before
504 * something wants to print data.
505 */
506 void
xsu_proc_bkpt(xsu_proc_t * xp)507 xsu_proc_bkpt(xsu_proc_t *xp)
508 {
509 int perr;
510 struct ps_prochandle *P;
511 char *const argv[3] = { xp->xp_prog, xp->xp_arg, NULL };
512 GElf_Sym sym;
513
514 P = Pcreate(xp->xp_prog, argv, &perr, NULL, 0);
515 if (P == NULL) {
516 errx(EXIT_FAILURE, "failed to create %s: %s", xp->xp_prog,
517 Pcreate_error(perr));
518 }
519
520 xp->xp_proc = P;
521 (void) Punsetflags(P, PR_RLC);
522 if (Psetflags(P, PR_KLC | PR_BPTADJ) != 0) {
523 int e = errno;
524 Prelease(P, PRELEASE_KILL);
525 errc(EXIT_FAILURE, e, "failed to set PR_KLC | PR_BPTADJ flags");
526 }
527
528 if (Pxlookup_by_name(P, LM_ID_BASE, xp->xp_object, xp->xp_symname, &sym,
529 NULL) != 0) {
530 err(EXIT_FAILURE, "failed to find %s`%s", xp->xp_object,
531 xp->xp_symname);
532 }
533
534 if (Pfault(P, FLTBPT, 1) != 0) {
535 errx(EXIT_FAILURE, "failed to set the FLTBPT disposition");
536 }
537
538 xp->xp_addr = sym.st_value;
539 if (Psetbkpt(P, sym.st_value, &xp->xp_instr) != 0) {
540 err(EXIT_FAILURE, "failed to set breakpoint on xsu_getfpu "
541 "(0x%" PRIx64 ")", sym.st_value);
542 }
543
544 if (Psetrun(P, 0, 0) != 0) {
545 err(EXIT_FAILURE, "failed to resume running our target");
546 }
547
548 if (Pwait(P, xsu_proc_timeout) != 0) {
549 err(EXIT_FAILURE, "%s did not hit our expected breakpoint",
550 argv[1]);
551 }
552 }
553
554 /*
555 * Run a process to completion and get its wait exit status.
556 */
557 void
xsu_proc_finish(xsu_proc_t * xp)558 xsu_proc_finish(xsu_proc_t *xp)
559 {
560 pid_t pid = Ppsinfo(xp->xp_proc)->pr_pid;
561
562 if (Pdelbkpt(xp->xp_proc, xp->xp_addr, xp->xp_instr) != 0) {
563 err(EXIT_FAILURE, "failed to delete %s`%s() breakpoint",
564 xp->xp_object, xp->xp_symname);
565 }
566
567 if (Psetrun(xp->xp_proc, 0, PRCFAULT) != 0) {
568 err(EXIT_FAILURE, "failed to resume running our target");
569 }
570
571 if (waitpid(pid, &xp->xp_wait, 0) != pid) {
572 err(EXIT_FAILURE, "failed to get our child processes's (%"
573 _PRIdID "), wait info", pid);
574 }
575
576 if (WIFEXITED(xp->xp_wait) == 0) {
577 errx(EXIT_FAILURE, "our child process didn't actually exit!");
578 }
579
580 Pfree(xp->xp_proc);
581 xp->xp_proc = NULL;
582 }
583
584 void
xsu_fpregset_xmm_set(fpregset_t * fpr,uint32_t seed)585 xsu_fpregset_xmm_set(fpregset_t *fpr, uint32_t seed)
586 {
587 size_t nregs = ARRAY_SIZE(fpr->fp_reg_set.fpchip_state.xmm);
588 for (uint32_t i = 0; i < nregs; i++) {
589 upad128_t *u128 = &fpr->fp_reg_set.fpchip_state.xmm[i];
590 for (uint32_t u32 = 0; u32 < XSU_XMM_U32; u32++, seed++) {
591 u128->_l[u32] = seed;
592 }
593 }
594 }
595
596 void
xsu_xregs_xmm_set(prxregset_t * prx,uint32_t seed)597 xsu_xregs_xmm_set(prxregset_t *prx, uint32_t seed)
598 {
599 prxregset_hdr_t *hdr = (prxregset_hdr_t *)prx;
600 prxregset_xsave_t *xsave = NULL;
601
602 for (uint32_t i = 0; i < hdr->pr_ninfo; i++) {
603 if (hdr->pr_info[i].pri_type == PRX_INFO_XSAVE) {
604 xsave = (void *)((uintptr_t)prx +
605 hdr->pr_info[i].pri_offset);
606 break;
607 }
608 }
609
610 if (xsave == NULL) {
611 errx(EXIT_FAILURE, "asked to set xsave %%xmm regs, but no "
612 "xsave info present");
613 }
614
615 size_t nregs = ARRAY_SIZE(xsave->prx_fx_xmm);
616 for (uint32_t i = 0; i < nregs; i++) {
617 for (uint32_t u32 = 0; u32 < XSU_XMM_U32; u32++, seed++) {
618 xsave->prx_fx_xmm[i]._l[u32] = seed;
619 }
620 }
621 }
622
623 static const prxregset_info_t *
xsu_xregs_find_comp(const prxregset_hdr_t * hdr,uint32_t comp,uintptr_t * datap)624 xsu_xregs_find_comp(const prxregset_hdr_t *hdr, uint32_t comp, uintptr_t *datap)
625 {
626 for (uint32_t i = 0; i < hdr->pr_ninfo; i++) {
627 if (hdr->pr_info[i].pri_type == comp) {
628 *datap = (uintptr_t)hdr + hdr->pr_info[i].pri_offset;
629 return (&hdr->pr_info[i]);
630 }
631 }
632
633 return (NULL);
634 }
635
636 boolean_t
xsu_xregs_comp_equal(const prxregset_t * src,const prxregset_t * dest,uint32_t comp)637 xsu_xregs_comp_equal(const prxregset_t *src, const prxregset_t *dest,
638 uint32_t comp)
639 {
640 const prxregset_hdr_t *shdr = (prxregset_hdr_t *)src;
641 const prxregset_hdr_t *dhdr = (prxregset_hdr_t *)dest;
642 const prxregset_info_t *sinfo = NULL, *dinfo = NULL;
643 uintptr_t sdata, ddata;
644
645 sinfo = xsu_xregs_find_comp(shdr, comp, &sdata);
646 if (sinfo == NULL) {
647 warnx("source xregs missing component %u", comp);
648 return (B_FALSE);
649 }
650
651 dinfo = xsu_xregs_find_comp(dhdr, comp, &ddata);
652 if (dinfo == NULL) {
653 warnx("destination xregs missing component %u", comp);
654 return (B_FALSE);
655 }
656
657 if (sinfo->pri_size != dinfo->pri_size) {
658 warnx("source xregs length 0x%x does not match dest xregs 0x%x",
659 sinfo->pri_size, dinfo->pri_size);
660 }
661
662 if (bcmp((void *)sdata, (void *)ddata, sinfo->pri_size) != 0) {
663 warnx("component data differs: dumping!");
664 for (uint32_t i = 0; i < sinfo->pri_offset; i++) {
665 const uint8_t *su8 = (uint8_t *)sdata;
666 const uint8_t *du8 = (uint8_t *)ddata;
667
668 if (su8[i] != du8[i]) {
669 (void) fprintf(stderr,
670 "src[%u] = 0x%2x\tdst[%u] = 0x%x\n",
671 i, su8[i], i, du8[i]);
672 }
673 }
674
675 return (B_FALSE);
676 }
677
678 return (B_TRUE);
679 }
680
681 boolean_t
xsu_fpregs_cmp(const fpregset_t * fpr,const prxregset_t * prx)682 xsu_fpregs_cmp(const fpregset_t *fpr, const prxregset_t *prx)
683 {
684 boolean_t valid = B_TRUE;
685 const prxregset_hdr_t *hdr = (prxregset_hdr_t *)prx;
686 const prxregset_xsave_t *xsave = NULL;
687 uint16_t fpr_cw, fpr_sw;
688
689 for (uint32_t i = 0; i < hdr->pr_ninfo; i++) {
690 if (hdr->pr_info[i].pri_type == PRX_INFO_XSAVE) {
691 xsave = (void *)((uintptr_t)prx +
692 hdr->pr_info[i].pri_offset);
693 break;
694 }
695 }
696
697 if (xsave == NULL) {
698 warnx("xregs missing xsave component for fpregs comparison");
699 return (B_FALSE);
700 }
701
702 /*
703 * First check the XMM registers because those don't require ifdefs,
704 * thankfully.
705 */
706 size_t nregs = ARRAY_SIZE(fpr->fp_reg_set.fpchip_state.xmm);
707 for (size_t i = 0; i < nregs; i++) {
708 const upad128_t *u128 = &fpr->fp_reg_set.fpchip_state.xmm[i];
709 for (uint32_t u32 = 0; u32 < XSU_XMM_U32; u32++) {
710 if (u128->_l[u32] != xsave->prx_fx_xmm[i]._l[u32]) {
711 valid = B_FALSE;
712 (void) fprintf(stderr, "fpregset xmm[%u] "
713 "u32[%u] does not match xsave, fpregset: "
714 "0x%x, xsave: 0x%x\n", i, u32,
715 u128->_l[u32],
716 xsave->prx_fx_xmm[i]._l[u32]);
717 }
718 }
719 }
720
721 if (xsave->prx_fx_mxcsr != fpr->fp_reg_set.fpchip_state.mxcsr) {
722 valid = B_FALSE;
723 (void) fprintf(stderr, "mxcsr mismatched: fpregset: 0x%x, "
724 "xsave: 0x%x\n", fpr->fp_reg_set.fpchip_state.mxcsr,
725 xsave->prx_fx_mxcsr);
726 }
727
728 /*
729 * Extract the basic x87 state. This requires ifdefs because the 32-bit
730 * ABI here is a bit, particular. The 32-bit fpregs is the mcontext_t
731 * struct which is mostly opaque and we need to use the ieeefp.h types
732 * which are only visible for ILP32. It also treats 16-bit values as
733 * 32-bit ones, hence masking below.
734 */
735 #ifdef __amd64
736 fpr_cw = fpr->fp_reg_set.fpchip_state.cw;
737 fpr_sw = fpr->fp_reg_set.fpchip_state.sw;
738 #else /* !__amd64 (__i386) */
739 struct _fpstate fps;
740
741 (void) memcpy(&fps, &fpr->fp_reg_set.fpchip_state, sizeof (fps));
742 fpr_cw = fps.cw & 0xffff;
743 fpr_sw = fps.sw & 0xffff;
744 #endif /* __amd64 */
745
746 if (fpr_cw != xsave->prx_fx_fcw) {
747 valid = B_FALSE;
748 (void) fprintf(stderr, "x87 cw mismatched: fpregset: 0x%x, "
749 "xsave: 0x%x\n", fpr_cw, xsave->prx_fx_fcw);
750 }
751
752 if (fpr_sw != xsave->prx_fx_fsw) {
753 valid = B_FALSE;
754 (void) fprintf(stderr, "x87 sw mismatched: fpregset: 0x%x, "
755 "xsave: 0x%x\n", fpr_sw, xsave->prx_fx_fsw);
756 }
757
758 return (valid);
759 }
760
761 void
xsu_ustack_alloc(ucontext_t * ctx)762 xsu_ustack_alloc(ucontext_t *ctx)
763 {
764 static void *stack = NULL;
765 static size_t size = 0;
766
767 if (size == 0) {
768 long sys = sysconf(_SC_THREAD_STACK_MIN);
769 if (sys == -1) {
770 err(EXIT_FAILURE, "failed to get minimum stack size");
771 }
772 size = (size_t)sys;
773
774 stack = calloc(size, sizeof (uint8_t));
775 if (stack == NULL) {
776 err(EXIT_FAILURE, "failed to allocate stack buffer");
777 }
778 }
779
780 ctx->uc_stack.ss_size = size;
781 ctx->uc_stack.ss_sp = stack;
782 ctx->uc_stack.ss_flags = 0;
783 }
784