1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2023 Oxide Computer Company 14 */ 15 16 /* 17 * This file implements various utility functions we use for the xsave tests. 18 */ 19 20 #include <string.h> 21 #include <strings.h> 22 #include <sys/auxv.h> 23 #include <sys/sysmacros.h> 24 #include <err.h> 25 #include <stdlib.h> 26 #include <procfs.h> 27 #include <sys/x86_archext.h> 28 #include <unistd.h> 29 #include <errno.h> 30 #include <sys/types.h> 31 #include <sys/wait.h> 32 #include <sys/debug.h> 33 #include <ieeefp.h> 34 35 #include "xsave_util.h" 36 37 static uint_t xsu_proc_timeout = 60 * 1000; /* 60s in ms */ 38 39 /* 40 * Determine if we have the hardware support required for a given level of 41 * hardware support. 42 */ 43 uint32_t 44 xsu_hwsupport(void) 45 { 46 uint_t isa[3]; 47 uint_t nisa = getisax(isa, ARRAY_SIZE(isa)); 48 49 if (nisa != ARRAY_SIZE(isa)) { 50 errx(EXIT_FAILURE, "did not get all %zu hwcap values, found %u", 51 ARRAY_SIZE(isa), nisa); 52 } 53 54 if ((isa[0] & AV_386_XSAVE) == 0) { 55 errx(EXIT_FAILURE, "xsave not present: this test should have " 56 "been skipped"); 57 } 58 59 if ((isa[1] & AV_386_2_AVX512F) != 0) { 60 warnx("found %%zmm support"); 61 return (XSU_ZMM); 62 } 63 64 if ((isa[0] & AV_386_AVX) != 0) { 65 warnx("found %%ymm support"); 66 return (XSU_YMM); 67 } 68 69 errx(EXIT_FAILURE, "no non-XMM xsave state found: this test should " 70 "have been skipped"); 71 } 72 73 /* 74 * Fill all the valid regions of an FPU based on treating the vector register as 75 * a series of uint32_t values and going from there. 76 */ 77 void 78 xsu_fill(xsu_fpu_t *fpu, uint32_t level, uint32_t start) 79 { 80 (void) memset(fpu, 0, sizeof (xsu_fpu_t)); 81 82 switch (level) { 83 default: 84 errx(EXIT_FAILURE, "given unknown xsu level: 0x%x", level); 85 case XSU_YMM: 86 for (uint32_t regno = 0; regno < XSU_MAX_YMM; regno++) { 87 for (uint32_t u32 = 0; u32 < XSU_YMM_U32; u32++, 88 start++) { 89 fpu->xf_reg[regno]._l[u32] = start; 90 } 91 } 92 break; 93 case XSU_ZMM: 94 for (uint32_t regno = 0; regno < XSU_MAX_ZMM; regno++) { 95 for (uint32_t u32 = 0; u32 < XSU_ZMM_U32; u32++, 96 start++) { 97 fpu->xf_reg[regno]._l[u32] = start; 98 } 99 } 100 for (uint32_t regno = 0; regno < ARRAY_SIZE(fpu->xf_opmask); 101 regno++) { 102 uint64_t val = start | (((uint64_t)start + 1) << 32); 103 fpu->xf_opmask[regno] = val; 104 start += 2; 105 } 106 break; 107 } 108 } 109 110 static void 111 xsu_overwrite_uctx_xmm(ucontext_t *uctx, const xsu_fpu_t *fpu) 112 { 113 struct _fpchip_state *fp; 114 115 fp = &uctx->uc_mcontext.fpregs.fp_reg_set.fpchip_state; 116 for (uint32_t i = 0; i < XSU_MAX_XMM; i++) { 117 (void) memcpy(&fp->xmm[i], &fpu->xf_reg[i]._l[0], 118 XSU_XMM_U32 * sizeof (uint32_t)); 119 } 120 } 121 122 static void 123 xsu_overwrite_uctx_ymm(uintptr_t arg, const xsu_fpu_t *fpu) 124 { 125 prxregset_ymm_t *ymm = (void *)arg; 126 127 for (uint32_t i = 0; i < XSU_MAX_YMM; i++) { 128 (void) memcpy(&ymm->prx_ymm[i]._l[0], 129 &fpu->xf_reg[i]._l[XSU_XMM_U32], 130 XSU_XMM_U32 * sizeof (uint32_t)); 131 } 132 } 133 134 static void 135 xsu_overwrite_uctx_zmm(uintptr_t arg, const xsu_fpu_t *fpu) 136 { 137 prxregset_zmm_t *zmm = (void *)arg; 138 139 /* 140 * Because this is the low zmm registers, we actually use the max ymm 141 * value as that's what actually fits in the low zmm and not the full 142 * definition. 143 */ 144 for (uint32_t i = 0; i < XSU_MAX_YMM; i++) { 145 (void) memcpy(&zmm->prx_zmm[i]._l[0], 146 &fpu->xf_reg[i]._l[XSU_YMM_U32], 147 XSU_YMM_U32 * sizeof (uint32_t)); 148 } 149 } 150 151 static void 152 xsu_overwrite_uctx_hi_zmm(uintptr_t arg, const xsu_fpu_t *fpu) 153 { 154 #ifdef __amd64 155 prxregset_hi_zmm_t *zmm = (void *)arg; 156 157 for (uint32_t i = XSU_MAX_YMM; i < XSU_MAX_ZMM; i++) { 158 (void) memcpy(&zmm->prx_hi_zmm[i - XSU_MAX_YMM]._l[0], 159 &fpu->xf_reg[i]._l[0], 160 XSU_ZMM_U32 * sizeof (uint32_t)); 161 } 162 #else /* !__amd64 */ 163 warnx("attempted to set High ZMM registers on a 32-bit process!"); 164 abort(); 165 #endif /* __amd64 */ 166 } 167 168 void 169 xsu_overwrite_uctx(ucontext_t *uctx, const xsu_fpu_t *fpu, uint32_t hwsup) 170 { 171 size_t xsave_size = sizeof (uc_xsave_t); 172 void *new_buf; 173 uc_xsave_t *ucs; 174 uintptr_t write_ptr; 175 176 if (hwsup != XSU_YMM && hwsup != XSU_ZMM) { 177 errx(EXIT_FAILURE, "given unknown xsu level: 0x%x", hwsup); 178 } 179 180 if (hwsup >= XSU_YMM) { 181 xsave_size += sizeof (prxregset_ymm_t); 182 } 183 184 if (hwsup >= XSU_ZMM) { 185 xsave_size += sizeof (prxregset_zmm_t); 186 xsave_size += sizeof (prxregset_opmask_t); 187 if (XSU_MAX_ZMM > 16) { 188 xsave_size += sizeof (prxregset_hi_zmm_t); 189 } 190 } 191 192 new_buf = calloc(1, xsave_size); 193 if (new_buf == NULL) { 194 errx(EXIT_FAILURE, "failed to allocate xsave buf"); 195 } 196 ucs = new_buf; 197 ucs->ucx_vers = UC_XSAVE_VERS; 198 ucs->ucx_len = xsave_size; 199 if (hwsup >= XSU_YMM) { 200 ucs->ucx_bv |= XFEATURE_AVX; 201 } 202 203 if (hwsup >= XSU_ZMM) { 204 ucs->ucx_bv |= XFEATURE_AVX512_OPMASK | XFEATURE_AVX512_ZMM; 205 if (XSU_MAX_ZMM > 16) 206 ucs->ucx_bv |= XFEATURE_AVX512_HI_ZMM; 207 } 208 209 /* 210 * At this point we have rigged things up. XMM values are in the 211 * ucontext_t itself. After that we must write things out in the kernel 212 * signal order. Note, the XMM state is not set in the bit-vector 213 * because well, we don't actually use the xsave pieces for it because o 214 * the ucontext_t ABI has the xmm state always there. See 215 * uts/intel/os/fpu.c's big theory statement for more info. 216 */ 217 xsu_overwrite_uctx_xmm(uctx, fpu); 218 write_ptr = (uintptr_t)new_buf + sizeof (uc_xsave_t); 219 if (hwsup >= XSU_YMM) { 220 xsu_overwrite_uctx_ymm(write_ptr, fpu); 221 write_ptr += sizeof (prxregset_ymm_t); 222 } 223 224 if (hwsup >= XSU_ZMM) { 225 (void) memcpy((void *)write_ptr, fpu->xf_opmask, 226 sizeof (fpu->xf_opmask)); 227 write_ptr += sizeof (fpu->xf_opmask); 228 xsu_overwrite_uctx_zmm(write_ptr, fpu); 229 write_ptr += sizeof (prxregset_zmm_t); 230 if (XSU_MAX_ZMM > 16) { 231 xsu_overwrite_uctx_hi_zmm(write_ptr, fpu); 232 write_ptr += sizeof (prxregset_hi_zmm_t); 233 } 234 } 235 236 uctx->uc_xsave = (long)(uintptr_t)new_buf; 237 } 238 239 static boolean_t 240 xsu_check_vector(const upad512_t *src, const upad512_t *chk, uint32_t regno, 241 uint32_t nu32) 242 { 243 boolean_t valid = B_TRUE; 244 245 for (uint32_t i = 0; i < nu32; i++) { 246 if (src->_l[i] != chk->_l[i]) { 247 warnx("vec[%u] u32 %u differs: expected 0x%x, " 248 "found 0x%x", regno, i, src->_l[i], chk->_l[i]); 249 valid = B_FALSE; 250 } 251 } 252 253 return (valid); 254 } 255 256 boolean_t 257 xsu_same(const xsu_fpu_t *src, const xsu_fpu_t *check, uint32_t hwsup) 258 { 259 boolean_t valid = B_TRUE; 260 261 switch (hwsup) { 262 default: 263 errx(EXIT_FAILURE, "given unknown xsu level: 0x%x", hwsup); 264 case XSU_YMM: 265 for (uint32_t i = 0; i < XSU_MAX_YMM; i++) { 266 if (!xsu_check_vector(&src->xf_reg[i], 267 &check->xf_reg[i], i, XSU_YMM_U32)) { 268 valid = B_FALSE; 269 } 270 } 271 break; 272 case XSU_ZMM: 273 for (uint32_t i = 0; i < XSU_MAX_ZMM; i++) { 274 if (!xsu_check_vector(&src->xf_reg[i], 275 &check->xf_reg[i], i, XSU_ZMM_U32)) { 276 valid = B_FALSE; 277 } 278 } 279 for (uint32_t i = 0; i < ARRAY_SIZE(src->xf_opmask); i++) { 280 if (src->xf_opmask[i] != check->xf_opmask[i]) { 281 warnx("mask[%u] differs: expected 0x%" PRIx64 282 ", found 0x%" PRIx64, i, src->xf_opmask[i], 283 check->xf_opmask[i]); 284 valid = B_FALSE; 285 } 286 } 287 break; 288 } 289 return (valid); 290 } 291 292 293 void * 294 xsu_sleeper_thread(void *arg __unused) 295 { 296 for (;;) { 297 (void) sleep(100); 298 } 299 return (NULL); 300 } 301 302 static void 303 xsu_dump_vector(FILE *f, const upad512_t *reg, uint32_t nu32, const char *name, 304 uint32_t idx) 305 { 306 VERIFY3U(nu32 % 4, ==, 0); 307 for (uint32_t i = 0; i < nu32; i += 4) { 308 (void) fprintf(f, "%s[%02u] [%02u:%02u] = { 0x%08x 0x%08x " 309 "0x%08x 0x%08x }\n", name, idx, i + 3, i, reg->_l[i + 3], 310 reg->_l[i + 2], reg->_l[i + 1], reg->_l[i]); 311 } 312 } 313 314 void 315 xsu_dump(FILE *f, const xsu_fpu_t *fpu, uint32_t hwsup) 316 { 317 318 switch (hwsup) { 319 default: 320 errx(EXIT_FAILURE, "given unknown xsu level: 0x%x", hwsup); 321 case XSU_YMM: 322 for (uint32_t i = 0; i < XSU_MAX_YMM; i++) { 323 xsu_dump_vector(f, &fpu->xf_reg[i], XSU_YMM_U32, 324 "ymm", i); 325 } 326 break; 327 case XSU_ZMM: 328 for (uint32_t i = 0; i < XSU_MAX_ZMM; i++) { 329 xsu_dump_vector(f, &fpu->xf_reg[i], XSU_ZMM_U32, 330 "zmm", i); 331 } 332 333 for (uint32_t i = 0; i < ARRAY_SIZE(fpu->xf_opmask); i++) { 334 (void) fprintf(f, "%%k%u 0x%016" PRIx64"\n", i, 335 fpu->xf_opmask[i]); 336 } 337 break; 338 } 339 } 340 341 typedef struct xsu_prx { 342 uint32_t xp_hwsup; 343 prxregset_xsave_t *xp_xsave; 344 prxregset_ymm_t *xp_ymm; 345 prxregset_opmask_t *xp_opmask; 346 prxregset_zmm_t *xp_zmm; 347 prxregset_hi_zmm_t *xp_hi_zmm; 348 } xsu_prx_t; 349 350 static void 351 xsu_fpu_to_xregs_xsave(xsu_prx_t *prx, const xsu_fpu_t *fpu) 352 { 353 prx->xp_xsave->prx_fx_fcw = FPU_CW_INIT; 354 prx->xp_xsave->prx_fx_mxcsr = SSE_MXCSR_INIT; 355 for (uint32_t i = 0; i < XSU_MAX_XMM; i++) { 356 (void) memcpy(&prx->xp_xsave->prx_fx_xmm[i], 357 &fpu->xf_reg[i]._l[0], XSU_XMM_U32 * sizeof (uint32_t)); 358 } 359 360 prx->xp_xsave->prx_xsh_xstate_bv = XFEATURE_LEGACY_FP | 361 XFEATURE_SSE; 362 if (prx->xp_hwsup >= XSU_YMM) { 363 prx->xp_xsave->prx_xsh_xstate_bv |= XFEATURE_AVX; 364 } 365 366 if (prx->xp_hwsup >= XSU_ZMM) { 367 prx->xp_xsave->prx_xsh_xstate_bv |= XFEATURE_AVX512; 368 } 369 } 370 371 static void 372 xsu_fpu_to_xregs_ymm(xsu_prx_t *prx, const xsu_fpu_t *fpu) 373 { 374 /* Copy the upper 128-bits to the YMM save area */ 375 for (uint32_t i = 0; i < XSU_MAX_YMM; i++) { 376 (void) memcpy(&prx->xp_ymm->prx_ymm[i], 377 &fpu->xf_reg[i]._l[XSU_XMM_U32], 378 XSU_XMM_U32 * sizeof (uint32_t)); 379 } 380 } 381 382 static void 383 xsu_fpu_to_xregs_zmm(xsu_prx_t *prx, const xsu_fpu_t *fpu) 384 { 385 /* The lower 16 regs are only 256-bit, the upper are 512-bit */ 386 for (uint32_t i = 0; i < MIN(XSU_MAX_ZMM, 16); i++) { 387 (void) memcpy(&prx->xp_zmm->prx_zmm[i], 388 &fpu->xf_reg[i]._l[XSU_YMM_U32], 389 XSU_YMM_U32 * sizeof (uint32_t)); 390 } 391 392 #ifdef __amd64 393 for (uint32_t i = 16; i < XSU_MAX_ZMM; i++) { 394 (void) memcpy(&prx->xp_hi_zmm->prx_hi_zmm[i - 16], 395 &fpu->xf_reg[i]._l[0], 396 XSU_ZMM_U32 * sizeof (uint32_t)); 397 } 398 #endif 399 400 (void) memcpy(prx->xp_opmask->prx_opmask, fpu->xf_opmask, 401 sizeof (prx->xp_opmask->prx_opmask)); 402 } 403 404 405 void 406 xsu_fpu_to_xregs(const xsu_fpu_t *fpu, uint32_t hwsup, prxregset_t **prxp, 407 size_t *sizep) 408 { 409 uint32_t ninfo = 1, curinfo; 410 size_t len = sizeof (prxregset_hdr_t) + sizeof (prxregset_info_t) + 411 sizeof (prxregset_xsave_t); 412 prxregset_hdr_t *hdr; 413 uint32_t off; 414 xsu_prx_t prx; 415 416 if (hwsup != XSU_YMM && hwsup != XSU_ZMM) { 417 errx(EXIT_FAILURE, "given unknown xsu level: 0x%x", hwsup); 418 } 419 420 if (hwsup >= XSU_YMM) { 421 len += sizeof (prxregset_info_t) + sizeof (prxregset_ymm_t); 422 ninfo++; 423 } 424 425 if (hwsup >= XSU_ZMM) { 426 len += 3 * sizeof (prxregset_info_t) + 427 sizeof (prxregset_opmask_t) + sizeof (prxregset_zmm_t) + 428 sizeof (prxregset_hi_zmm_t); 429 ninfo += 3; 430 } 431 432 hdr = calloc(1, len); 433 if (hdr == NULL) { 434 err(EXIT_FAILURE, "failed to allocate prxregset_t (%zu bytes)", 435 len); 436 } 437 (void) memset(&prx, 0, sizeof (prx)); 438 prx.xp_hwsup = hwsup; 439 440 #ifdef __amd64 441 VERIFY3U(len, <=, UINT32_MAX); 442 #endif /* __amd64 */ 443 hdr->pr_type = PR_TYPE_XSAVE; 444 hdr->pr_size = (uint32_t)len; 445 hdr->pr_ninfo = ninfo; 446 447 curinfo = 0; 448 off = sizeof (prxregset_hdr_t) + sizeof (prxregset_info_t) * ninfo; 449 hdr->pr_info[curinfo].pri_type = PRX_INFO_XSAVE; 450 hdr->pr_info[curinfo].pri_size = sizeof (prxregset_xsave_t); 451 hdr->pr_info[curinfo].pri_offset = off; 452 prx.xp_xsave = (void *)((uintptr_t)hdr + off); 453 off += sizeof (prxregset_xsave_t); 454 curinfo++; 455 456 if (hwsup >= XSU_YMM) { 457 hdr->pr_info[curinfo].pri_type = PRX_INFO_YMM; 458 hdr->pr_info[curinfo].pri_size = sizeof (prxregset_ymm_t); 459 hdr->pr_info[curinfo].pri_offset = off; 460 prx.xp_ymm = (void *)((uintptr_t)hdr + off); 461 off += sizeof (prxregset_ymm_t); 462 curinfo++; 463 } 464 465 if (hwsup >= XSU_ZMM) { 466 hdr->pr_info[curinfo].pri_type = PRX_INFO_OPMASK; 467 hdr->pr_info[curinfo].pri_size = sizeof (prxregset_opmask_t); 468 hdr->pr_info[curinfo].pri_offset = off; 469 prx.xp_opmask = (void *)((uintptr_t)hdr + off); 470 off += sizeof (prxregset_opmask_t); 471 curinfo++; 472 473 hdr->pr_info[curinfo].pri_type = PRX_INFO_ZMM; 474 hdr->pr_info[curinfo].pri_size = sizeof (prxregset_zmm_t); 475 hdr->pr_info[curinfo].pri_offset = off; 476 prx.xp_zmm = (void *)((uintptr_t)hdr + off); 477 off += sizeof (prxregset_zmm_t); 478 curinfo++; 479 480 hdr->pr_info[curinfo].pri_type = PRX_INFO_HI_ZMM; 481 hdr->pr_info[curinfo].pri_size = sizeof (prxregset_hi_zmm_t); 482 hdr->pr_info[curinfo].pri_offset = off; 483 prx.xp_hi_zmm = (void *)((uintptr_t)hdr + off); 484 off += sizeof (prxregset_hi_zmm_t); 485 curinfo++; 486 } 487 488 xsu_fpu_to_xregs_xsave(&prx, fpu); 489 if (hwsup >= XSU_YMM) { 490 xsu_fpu_to_xregs_ymm(&prx, fpu); 491 } 492 493 if (hwsup >= XSU_ZMM) { 494 xsu_fpu_to_xregs_zmm(&prx, fpu); 495 } 496 497 *prxp = (prxregset_t *)hdr; 498 *sizep = len; 499 } 500 501 /* 502 * This pairs with xsu_proc_finish() below. The goal is to allow us to inject 503 * state after hitting a breakpoint, which is generally used right before 504 * something wants to print data. 505 */ 506 void 507 xsu_proc_bkpt(xsu_proc_t *xp) 508 { 509 int perr; 510 struct ps_prochandle *P; 511 char *const argv[3] = { xp->xp_prog, xp->xp_arg, NULL }; 512 GElf_Sym sym; 513 514 P = Pcreate(xp->xp_prog, argv, &perr, NULL, 0); 515 if (P == NULL) { 516 errx(EXIT_FAILURE, "failed to create %s: %s", xp->xp_prog, 517 Pcreate_error(perr)); 518 } 519 520 xp->xp_proc = P; 521 (void) Punsetflags(P, PR_RLC); 522 if (Psetflags(P, PR_KLC | PR_BPTADJ) != 0) { 523 int e = errno; 524 Prelease(P, PRELEASE_KILL); 525 errc(EXIT_FAILURE, e, "failed to set PR_KLC | PR_BPTADJ flags"); 526 } 527 528 if (Pxlookup_by_name(P, LM_ID_BASE, xp->xp_object, xp->xp_symname, &sym, 529 NULL) != 0) { 530 err(EXIT_FAILURE, "failed to find %s`%s", xp->xp_object, 531 xp->xp_symname); 532 } 533 534 if (Pfault(P, FLTBPT, 1) != 0) { 535 errx(EXIT_FAILURE, "failed to set the FLTBPT disposition"); 536 } 537 538 xp->xp_addr = sym.st_value; 539 if (Psetbkpt(P, sym.st_value, &xp->xp_instr) != 0) { 540 err(EXIT_FAILURE, "failed to set breakpoint on xsu_getfpu " 541 "(0x%" PRIx64 ")", sym.st_value); 542 } 543 544 if (Psetrun(P, 0, 0) != 0) { 545 err(EXIT_FAILURE, "failed to resume running our target"); 546 } 547 548 if (Pwait(P, xsu_proc_timeout) != 0) { 549 err(EXIT_FAILURE, "%s did not hit our expected breakpoint", 550 argv[1]); 551 } 552 } 553 554 /* 555 * Run a process to completion and get its wait exit status. 556 */ 557 void 558 xsu_proc_finish(xsu_proc_t *xp) 559 { 560 pid_t pid = Ppsinfo(xp->xp_proc)->pr_pid; 561 562 if (Pdelbkpt(xp->xp_proc, xp->xp_addr, xp->xp_instr) != 0) { 563 err(EXIT_FAILURE, "failed to delete %s`%s() breakpoint", 564 xp->xp_object, xp->xp_symname); 565 } 566 567 if (Psetrun(xp->xp_proc, 0, PRCFAULT) != 0) { 568 err(EXIT_FAILURE, "failed to resume running our target"); 569 } 570 571 if (waitpid(pid, &xp->xp_wait, 0) != pid) { 572 err(EXIT_FAILURE, "failed to get our child processes's (%" 573 _PRIdID "), wait info", pid); 574 } 575 576 if (WIFEXITED(xp->xp_wait) == 0) { 577 errx(EXIT_FAILURE, "our child process didn't actually exit!"); 578 } 579 580 Pfree(xp->xp_proc); 581 xp->xp_proc = NULL; 582 } 583 584 void 585 xsu_fpregset_xmm_set(fpregset_t *fpr, uint32_t seed) 586 { 587 size_t nregs = ARRAY_SIZE(fpr->fp_reg_set.fpchip_state.xmm); 588 for (uint32_t i = 0; i < nregs; i++) { 589 upad128_t *u128 = &fpr->fp_reg_set.fpchip_state.xmm[i]; 590 for (uint32_t u32 = 0; u32 < XSU_XMM_U32; u32++, seed++) { 591 u128->_l[u32] = seed; 592 } 593 } 594 } 595 596 void 597 xsu_xregs_xmm_set(prxregset_t *prx, uint32_t seed) 598 { 599 prxregset_hdr_t *hdr = (prxregset_hdr_t *)prx; 600 prxregset_xsave_t *xsave = NULL; 601 602 for (uint32_t i = 0; i < hdr->pr_ninfo; i++) { 603 if (hdr->pr_info[i].pri_type == PRX_INFO_XSAVE) { 604 xsave = (void *)((uintptr_t)prx + 605 hdr->pr_info[i].pri_offset); 606 break; 607 } 608 } 609 610 if (xsave == NULL) { 611 errx(EXIT_FAILURE, "asked to set xsave %%xmm regs, but no " 612 "xsave info present"); 613 } 614 615 size_t nregs = ARRAY_SIZE(xsave->prx_fx_xmm); 616 for (uint32_t i = 0; i < nregs; i++) { 617 for (uint32_t u32 = 0; u32 < XSU_XMM_U32; u32++, seed++) { 618 xsave->prx_fx_xmm[i]._l[u32] = seed; 619 } 620 } 621 } 622 623 static const prxregset_info_t * 624 xsu_xregs_find_comp(const prxregset_hdr_t *hdr, uint32_t comp, uintptr_t *datap) 625 { 626 for (uint32_t i = 0; i < hdr->pr_ninfo; i++) { 627 if (hdr->pr_info[i].pri_type == comp) { 628 *datap = (uintptr_t)hdr + hdr->pr_info[i].pri_offset; 629 return (&hdr->pr_info[i]); 630 } 631 } 632 633 return (NULL); 634 } 635 636 boolean_t 637 xsu_xregs_comp_equal(const prxregset_t *src, const prxregset_t *dest, 638 uint32_t comp) 639 { 640 const prxregset_hdr_t *shdr = (prxregset_hdr_t *)src; 641 const prxregset_hdr_t *dhdr = (prxregset_hdr_t *)dest; 642 const prxregset_info_t *sinfo = NULL, *dinfo = NULL; 643 uintptr_t sdata, ddata; 644 645 sinfo = xsu_xregs_find_comp(shdr, comp, &sdata); 646 if (sinfo == NULL) { 647 warnx("source xregs missing component %u", comp); 648 return (B_FALSE); 649 } 650 651 dinfo = xsu_xregs_find_comp(dhdr, comp, &ddata); 652 if (dinfo == NULL) { 653 warnx("destination xregs missing component %u", comp); 654 return (B_FALSE); 655 } 656 657 if (sinfo->pri_size != dinfo->pri_size) { 658 warnx("source xregs length 0x%x does not match dest xregs 0x%x", 659 sinfo->pri_size, dinfo->pri_size); 660 } 661 662 if (bcmp((void *)sdata, (void *)ddata, sinfo->pri_size) != 0) { 663 warnx("component data differs: dumping!"); 664 for (uint32_t i = 0; i < sinfo->pri_offset; i++) { 665 const uint8_t *su8 = (uint8_t *)sdata; 666 const uint8_t *du8 = (uint8_t *)ddata; 667 668 if (su8[i] != du8[i]) { 669 (void) fprintf(stderr, 670 "src[%u] = 0x%2x\tdst[%u] = 0x%x\n", 671 i, su8[i], i, du8[i]); 672 } 673 } 674 675 return (B_FALSE); 676 } 677 678 return (B_TRUE); 679 } 680 681 boolean_t 682 xsu_fpregs_cmp(const fpregset_t *fpr, const prxregset_t *prx) 683 { 684 boolean_t valid = B_TRUE; 685 const prxregset_hdr_t *hdr = (prxregset_hdr_t *)prx; 686 const prxregset_xsave_t *xsave = NULL; 687 uint16_t fpr_cw, fpr_sw; 688 689 for (uint32_t i = 0; i < hdr->pr_ninfo; i++) { 690 if (hdr->pr_info[i].pri_type == PRX_INFO_XSAVE) { 691 xsave = (void *)((uintptr_t)prx + 692 hdr->pr_info[i].pri_offset); 693 break; 694 } 695 } 696 697 if (xsave == NULL) { 698 warnx("xregs missing xsave component for fpregs comparison"); 699 return (B_FALSE); 700 } 701 702 /* 703 * First check the XMM registers because those don't require ifdefs, 704 * thankfully. 705 */ 706 size_t nregs = ARRAY_SIZE(fpr->fp_reg_set.fpchip_state.xmm); 707 for (size_t i = 0; i < nregs; i++) { 708 const upad128_t *u128 = &fpr->fp_reg_set.fpchip_state.xmm[i]; 709 for (uint32_t u32 = 0; u32 < XSU_XMM_U32; u32++) { 710 if (u128->_l[u32] != xsave->prx_fx_xmm[i]._l[u32]) { 711 valid = B_FALSE; 712 (void) fprintf(stderr, "fpregset xmm[%u] " 713 "u32[%u] does not match xsave, fpregset: " 714 "0x%x, xsave: 0x%x\n", i, u32, 715 u128->_l[u32], 716 xsave->prx_fx_xmm[i]._l[u32]); 717 } 718 } 719 } 720 721 if (xsave->prx_fx_mxcsr != fpr->fp_reg_set.fpchip_state.mxcsr) { 722 valid = B_FALSE; 723 (void) fprintf(stderr, "mxcsr mismatched: fpregset: 0x%x, " 724 "xsave: 0x%x\n", fpr->fp_reg_set.fpchip_state.mxcsr, 725 xsave->prx_fx_mxcsr); 726 } 727 728 /* 729 * Extract the basic x87 state. This requires ifdefs because the 32-bit 730 * ABI here is a bit, particular. The 32-bit fpregs is the mcontext_t 731 * struct which is mostly opaque and we need to use the ieeefp.h types 732 * which are only visible for ILP32. It also treats 16-bit values as 733 * 32-bit ones, hence masking below. 734 */ 735 #ifdef __amd64 736 fpr_cw = fpr->fp_reg_set.fpchip_state.cw; 737 fpr_sw = fpr->fp_reg_set.fpchip_state.sw; 738 #else /* !__amd64 (__i386) */ 739 struct _fpstate fps; 740 741 (void) memcpy(&fps, &fpr->fp_reg_set.fpchip_state, sizeof (fps)); 742 fpr_cw = fps.cw & 0xffff; 743 fpr_sw = fps.sw & 0xffff; 744 #endif /* __amd64 */ 745 746 if (fpr_cw != xsave->prx_fx_fcw) { 747 valid = B_FALSE; 748 (void) fprintf(stderr, "x87 cw mismatched: fpregset: 0x%x, " 749 "xsave: 0x%x\n", fpr_cw, xsave->prx_fx_fcw); 750 } 751 752 if (fpr_sw != xsave->prx_fx_fsw) { 753 valid = B_FALSE; 754 (void) fprintf(stderr, "x87 sw mismatched: fpregset: 0x%x, " 755 "xsave: 0x%x\n", fpr_sw, xsave->prx_fx_fsw); 756 } 757 758 return (valid); 759 } 760 761 void 762 xsu_ustack_alloc(ucontext_t *ctx) 763 { 764 static void *stack = NULL; 765 static size_t size = 0; 766 767 if (size == 0) { 768 long sys = sysconf(_SC_THREAD_STACK_MIN); 769 if (sys == -1) { 770 err(EXIT_FAILURE, "failed to get minimum stack size"); 771 } 772 size = (size_t)sys; 773 774 stack = calloc(size, sizeof (uint8_t)); 775 if (stack == NULL) { 776 err(EXIT_FAILURE, "failed to allocate stack buffer"); 777 } 778 } 779 780 ctx->uc_stack.ss_size = size; 781 ctx->uc_stack.ss_sp = stack; 782 ctx->uc_stack.ss_flags = 0; 783 } 784