xref: /linux/tools/testing/selftests/x86/amx.c (revision eb01fe7abbe2d0b38824d2a93fdb4cc3eaf2ccc1)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 #define _GNU_SOURCE
4 #include <err.h>
5 #include <errno.h>
6 #include <pthread.h>
7 #include <setjmp.h>
8 #include <stdio.h>
9 #include <string.h>
10 #include <stdbool.h>
11 #include <unistd.h>
12 #include <x86intrin.h>
13 
14 #include <sys/auxv.h>
15 #include <sys/mman.h>
16 #include <sys/shm.h>
17 #include <sys/ptrace.h>
18 #include <sys/syscall.h>
19 #include <sys/wait.h>
20 #include <sys/uio.h>
21 
22 #include "../kselftest.h" /* For __cpuid_count() */
23 
24 #ifndef __x86_64__
25 # error This test is 64-bit only
26 #endif
27 
28 #define XSAVE_HDR_OFFSET	512
29 #define XSAVE_HDR_SIZE		64
30 
31 struct xsave_buffer {
32 	union {
33 		struct {
34 			char legacy[XSAVE_HDR_OFFSET];
35 			char header[XSAVE_HDR_SIZE];
36 			char extended[0];
37 		};
38 		char bytes[0];
39 	};
40 };
41 
42 static inline uint64_t xgetbv(uint32_t index)
43 {
44 	uint32_t eax, edx;
45 
46 	asm volatile("xgetbv;"
47 		     : "=a" (eax), "=d" (edx)
48 		     : "c" (index));
49 	return eax + ((uint64_t)edx << 32);
50 }
51 
52 static inline void xsave(struct xsave_buffer *xbuf, uint64_t rfbm)
53 {
54 	uint32_t rfbm_lo = rfbm;
55 	uint32_t rfbm_hi = rfbm >> 32;
56 
57 	asm volatile("xsave (%%rdi)"
58 		     : : "D" (xbuf), "a" (rfbm_lo), "d" (rfbm_hi)
59 		     : "memory");
60 }
61 
62 static inline void xrstor(struct xsave_buffer *xbuf, uint64_t rfbm)
63 {
64 	uint32_t rfbm_lo = rfbm;
65 	uint32_t rfbm_hi = rfbm >> 32;
66 
67 	asm volatile("xrstor (%%rdi)"
68 		     : : "D" (xbuf), "a" (rfbm_lo), "d" (rfbm_hi));
69 }
70 
71 /* err() exits and will not return */
72 #define fatal_error(msg, ...)	err(1, "[FAIL]\t" msg, ##__VA_ARGS__)
73 
74 static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
75 		       int flags)
76 {
77 	struct sigaction sa;
78 
79 	memset(&sa, 0, sizeof(sa));
80 	sa.sa_sigaction = handler;
81 	sa.sa_flags = SA_SIGINFO | flags;
82 	sigemptyset(&sa.sa_mask);
83 	if (sigaction(sig, &sa, 0))
84 		fatal_error("sigaction");
85 }
86 
87 static void clearhandler(int sig)
88 {
89 	struct sigaction sa;
90 
91 	memset(&sa, 0, sizeof(sa));
92 	sa.sa_handler = SIG_DFL;
93 	sigemptyset(&sa.sa_mask);
94 	if (sigaction(sig, &sa, 0))
95 		fatal_error("sigaction");
96 }
97 
98 #define XFEATURE_XTILECFG	17
99 #define XFEATURE_XTILEDATA	18
100 #define XFEATURE_MASK_XTILECFG	(1 << XFEATURE_XTILECFG)
101 #define XFEATURE_MASK_XTILEDATA	(1 << XFEATURE_XTILEDATA)
102 #define XFEATURE_MASK_XTILE	(XFEATURE_MASK_XTILECFG | XFEATURE_MASK_XTILEDATA)
103 
104 #define CPUID_LEAF1_ECX_XSAVE_MASK	(1 << 26)
105 #define CPUID_LEAF1_ECX_OSXSAVE_MASK	(1 << 27)
106 static inline void check_cpuid_xsave(void)
107 {
108 	uint32_t eax, ebx, ecx, edx;
109 
110 	/*
111 	 * CPUID.1:ECX.XSAVE[bit 26] enumerates general
112 	 * support for the XSAVE feature set, including
113 	 * XGETBV.
114 	 */
115 	__cpuid_count(1, 0, eax, ebx, ecx, edx);
116 	if (!(ecx & CPUID_LEAF1_ECX_XSAVE_MASK))
117 		fatal_error("cpuid: no CPU xsave support");
118 	if (!(ecx & CPUID_LEAF1_ECX_OSXSAVE_MASK))
119 		fatal_error("cpuid: no OS xsave support");
120 }
121 
122 static uint32_t xbuf_size;
123 
124 static struct {
125 	uint32_t xbuf_offset;
126 	uint32_t size;
127 } xtiledata;
128 
129 #define CPUID_LEAF_XSTATE		0xd
130 #define CPUID_SUBLEAF_XSTATE_USER	0x0
131 #define TILE_CPUID			0x1d
132 #define TILE_PALETTE_ID			0x1
133 
134 static void check_cpuid_xtiledata(void)
135 {
136 	uint32_t eax, ebx, ecx, edx;
137 
138 	__cpuid_count(CPUID_LEAF_XSTATE, CPUID_SUBLEAF_XSTATE_USER,
139 		      eax, ebx, ecx, edx);
140 
141 	/*
142 	 * EBX enumerates the size (in bytes) required by the XSAVE
143 	 * instruction for an XSAVE area containing all the user state
144 	 * components corresponding to bits currently set in XCR0.
145 	 *
146 	 * Stash that off so it can be used to allocate buffers later.
147 	 */
148 	xbuf_size = ebx;
149 
150 	__cpuid_count(CPUID_LEAF_XSTATE, XFEATURE_XTILEDATA,
151 		      eax, ebx, ecx, edx);
152 	/*
153 	 * eax: XTILEDATA state component size
154 	 * ebx: XTILEDATA state component offset in user buffer
155 	 */
156 	if (!eax || !ebx)
157 		fatal_error("xstate cpuid: invalid tile data size/offset: %d/%d",
158 				eax, ebx);
159 
160 	xtiledata.size	      = eax;
161 	xtiledata.xbuf_offset = ebx;
162 }
163 
164 /* The helpers for managing XSAVE buffer and tile states: */
165 
166 struct xsave_buffer *alloc_xbuf(void)
167 {
168 	struct xsave_buffer *xbuf;
169 
170 	/* XSAVE buffer should be 64B-aligned. */
171 	xbuf = aligned_alloc(64, xbuf_size);
172 	if (!xbuf)
173 		fatal_error("aligned_alloc()");
174 	return xbuf;
175 }
176 
177 static inline void clear_xstate_header(struct xsave_buffer *buffer)
178 {
179 	memset(&buffer->header, 0, sizeof(buffer->header));
180 }
181 
182 static inline uint64_t get_xstatebv(struct xsave_buffer *buffer)
183 {
184 	/* XSTATE_BV is at the beginning of the header: */
185 	return *(uint64_t *)&buffer->header;
186 }
187 
188 static inline void set_xstatebv(struct xsave_buffer *buffer, uint64_t bv)
189 {
190 	/* XSTATE_BV is at the beginning of the header: */
191 	*(uint64_t *)(&buffer->header) = bv;
192 }
193 
194 static void set_rand_tiledata(struct xsave_buffer *xbuf)
195 {
196 	int *ptr = (int *)&xbuf->bytes[xtiledata.xbuf_offset];
197 	int data;
198 	int i;
199 
200 	/*
201 	 * Ensure that 'data' is never 0.  This ensures that
202 	 * the registers are never in their initial configuration
203 	 * and thus never tracked as being in the init state.
204 	 */
205 	data = rand() | 1;
206 
207 	for (i = 0; i < xtiledata.size / sizeof(int); i++, ptr++)
208 		*ptr = data;
209 }
210 
211 struct xsave_buffer *stashed_xsave;
212 
213 static void init_stashed_xsave(void)
214 {
215 	stashed_xsave = alloc_xbuf();
216 	if (!stashed_xsave)
217 		fatal_error("failed to allocate stashed_xsave\n");
218 	clear_xstate_header(stashed_xsave);
219 }
220 
221 static void free_stashed_xsave(void)
222 {
223 	free(stashed_xsave);
224 }
225 
226 /* See 'struct _fpx_sw_bytes' at sigcontext.h */
227 #define SW_BYTES_OFFSET		464
228 /* N.B. The struct's field name varies so read from the offset. */
229 #define SW_BYTES_BV_OFFSET	(SW_BYTES_OFFSET + 8)
230 
231 static inline struct _fpx_sw_bytes *get_fpx_sw_bytes(void *buffer)
232 {
233 	return (struct _fpx_sw_bytes *)(buffer + SW_BYTES_OFFSET);
234 }
235 
236 static inline uint64_t get_fpx_sw_bytes_features(void *buffer)
237 {
238 	return *(uint64_t *)(buffer + SW_BYTES_BV_OFFSET);
239 }
240 
241 /* Work around printf() being unsafe in signals: */
242 #define SIGNAL_BUF_LEN 1000
243 char signal_message_buffer[SIGNAL_BUF_LEN];
244 void sig_print(char *msg)
245 {
246 	int left = SIGNAL_BUF_LEN - strlen(signal_message_buffer) - 1;
247 
248 	strncat(signal_message_buffer, msg, left);
249 }
250 
251 static volatile bool noperm_signaled;
252 static int noperm_errs;
253 /*
254  * Signal handler for when AMX is used but
255  * permission has not been obtained.
256  */
257 static void handle_noperm(int sig, siginfo_t *si, void *ctx_void)
258 {
259 	ucontext_t *ctx = (ucontext_t *)ctx_void;
260 	void *xbuf = ctx->uc_mcontext.fpregs;
261 	struct _fpx_sw_bytes *sw_bytes;
262 	uint64_t features;
263 
264 	/* Reset the signal message buffer: */
265 	signal_message_buffer[0] = '\0';
266 	sig_print("\tAt SIGILL handler,\n");
267 
268 	if (si->si_code != ILL_ILLOPC) {
269 		noperm_errs++;
270 		sig_print("[FAIL]\tInvalid signal code.\n");
271 	} else {
272 		sig_print("[OK]\tValid signal code (ILL_ILLOPC).\n");
273 	}
274 
275 	sw_bytes = get_fpx_sw_bytes(xbuf);
276 	/*
277 	 * Without permission, the signal XSAVE buffer should not
278 	 * have room for AMX register state (aka. xtiledata).
279 	 * Check that the size does not overlap with where xtiledata
280 	 * will reside.
281 	 *
282 	 * This also implies that no state components *PAST*
283 	 * XTILEDATA (features >=19) can be present in the buffer.
284 	 */
285 	if (sw_bytes->xstate_size <= xtiledata.xbuf_offset) {
286 		sig_print("[OK]\tValid xstate size\n");
287 	} else {
288 		noperm_errs++;
289 		sig_print("[FAIL]\tInvalid xstate size\n");
290 	}
291 
292 	features = get_fpx_sw_bytes_features(xbuf);
293 	/*
294 	 * Without permission, the XTILEDATA feature
295 	 * bit should not be set.
296 	 */
297 	if ((features & XFEATURE_MASK_XTILEDATA) == 0) {
298 		sig_print("[OK]\tValid xstate mask\n");
299 	} else {
300 		noperm_errs++;
301 		sig_print("[FAIL]\tInvalid xstate mask\n");
302 	}
303 
304 	noperm_signaled = true;
305 	ctx->uc_mcontext.gregs[REG_RIP] += 3; /* Skip the faulting XRSTOR */
306 }
307 
308 /* Return true if XRSTOR is successful; otherwise, false. */
309 static inline bool xrstor_safe(struct xsave_buffer *xbuf, uint64_t mask)
310 {
311 	noperm_signaled = false;
312 	xrstor(xbuf, mask);
313 
314 	/* Print any messages produced by the signal code: */
315 	printf("%s", signal_message_buffer);
316 	/*
317 	 * Reset the buffer to make sure any future printing
318 	 * only outputs new messages:
319 	 */
320 	signal_message_buffer[0] = '\0';
321 
322 	if (noperm_errs)
323 		fatal_error("saw %d errors in noperm signal handler\n", noperm_errs);
324 
325 	return !noperm_signaled;
326 }
327 
328 /*
329  * Use XRSTOR to populate the XTILEDATA registers with
330  * random data.
331  *
332  * Return true if successful; otherwise, false.
333  */
334 static inline bool load_rand_tiledata(struct xsave_buffer *xbuf)
335 {
336 	clear_xstate_header(xbuf);
337 	set_xstatebv(xbuf, XFEATURE_MASK_XTILEDATA);
338 	set_rand_tiledata(xbuf);
339 	return xrstor_safe(xbuf, XFEATURE_MASK_XTILEDATA);
340 }
341 
342 /* Return XTILEDATA to its initial configuration. */
343 static inline void init_xtiledata(void)
344 {
345 	clear_xstate_header(stashed_xsave);
346 	xrstor_safe(stashed_xsave, XFEATURE_MASK_XTILEDATA);
347 }
348 
349 enum expected_result { FAIL_EXPECTED, SUCCESS_EXPECTED };
350 
351 /* arch_prctl() and sigaltstack() test */
352 
353 #define ARCH_GET_XCOMP_PERM	0x1022
354 #define ARCH_REQ_XCOMP_PERM	0x1023
355 
356 static void req_xtiledata_perm(void)
357 {
358 	syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_PERM, XFEATURE_XTILEDATA);
359 }
360 
361 static void validate_req_xcomp_perm(enum expected_result exp)
362 {
363 	unsigned long bitmask, expected_bitmask;
364 	long rc;
365 
366 	rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_PERM, &bitmask);
367 	if (rc) {
368 		fatal_error("prctl(ARCH_GET_XCOMP_PERM) error: %ld", rc);
369 	} else if (!(bitmask & XFEATURE_MASK_XTILECFG)) {
370 		fatal_error("ARCH_GET_XCOMP_PERM returns XFEATURE_XTILECFG off.");
371 	}
372 
373 	rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_PERM, XFEATURE_XTILEDATA);
374 	if (exp == FAIL_EXPECTED) {
375 		if (rc) {
376 			printf("[OK]\tARCH_REQ_XCOMP_PERM saw expected failure..\n");
377 			return;
378 		}
379 
380 		fatal_error("ARCH_REQ_XCOMP_PERM saw unexpected success.\n");
381 	} else if (rc) {
382 		fatal_error("ARCH_REQ_XCOMP_PERM saw unexpected failure.\n");
383 	}
384 
385 	expected_bitmask = bitmask | XFEATURE_MASK_XTILEDATA;
386 
387 	rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_PERM, &bitmask);
388 	if (rc) {
389 		fatal_error("prctl(ARCH_GET_XCOMP_PERM) error: %ld", rc);
390 	} else if (bitmask != expected_bitmask) {
391 		fatal_error("ARCH_REQ_XCOMP_PERM set a wrong bitmask: %lx, expected: %lx.\n",
392 			    bitmask, expected_bitmask);
393 	} else {
394 		printf("\tARCH_REQ_XCOMP_PERM is successful.\n");
395 	}
396 }
397 
398 static void validate_xcomp_perm(enum expected_result exp)
399 {
400 	bool load_success = load_rand_tiledata(stashed_xsave);
401 
402 	if (exp == FAIL_EXPECTED) {
403 		if (load_success) {
404 			noperm_errs++;
405 			printf("[FAIL]\tLoad tiledata succeeded.\n");
406 		} else {
407 			printf("[OK]\tLoad tiledata failed.\n");
408 		}
409 	} else if (exp == SUCCESS_EXPECTED) {
410 		if (load_success) {
411 			printf("[OK]\tLoad tiledata succeeded.\n");
412 		} else {
413 			noperm_errs++;
414 			printf("[FAIL]\tLoad tiledata failed.\n");
415 		}
416 	}
417 }
418 
419 #ifndef AT_MINSIGSTKSZ
420 #  define AT_MINSIGSTKSZ	51
421 #endif
422 
423 static void *alloc_altstack(unsigned int size)
424 {
425 	void *altstack;
426 
427 	altstack = mmap(NULL, size, PROT_READ | PROT_WRITE,
428 			MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
429 
430 	if (altstack == MAP_FAILED)
431 		fatal_error("mmap() for altstack");
432 
433 	return altstack;
434 }
435 
436 static void setup_altstack(void *addr, unsigned long size, enum expected_result exp)
437 {
438 	stack_t ss;
439 	int rc;
440 
441 	memset(&ss, 0, sizeof(ss));
442 	ss.ss_size = size;
443 	ss.ss_sp = addr;
444 
445 	rc = sigaltstack(&ss, NULL);
446 
447 	if (exp == FAIL_EXPECTED) {
448 		if (rc) {
449 			printf("[OK]\tsigaltstack() failed.\n");
450 		} else {
451 			fatal_error("sigaltstack() succeeded unexpectedly.\n");
452 		}
453 	} else if (rc) {
454 		fatal_error("sigaltstack()");
455 	}
456 }
457 
458 static void test_dynamic_sigaltstack(void)
459 {
460 	unsigned int small_size, enough_size;
461 	unsigned long minsigstksz;
462 	void *altstack;
463 
464 	minsigstksz = getauxval(AT_MINSIGSTKSZ);
465 	printf("\tAT_MINSIGSTKSZ = %lu\n", minsigstksz);
466 	/*
467 	 * getauxval() itself can return 0 for failure or
468 	 * success.  But, in this case, AT_MINSIGSTKSZ
469 	 * will always return a >=0 value if implemented.
470 	 * Just check for 0.
471 	 */
472 	if (minsigstksz == 0) {
473 		printf("no support for AT_MINSIGSTKSZ, skipping sigaltstack tests\n");
474 		return;
475 	}
476 
477 	enough_size = minsigstksz * 2;
478 
479 	altstack = alloc_altstack(enough_size);
480 	printf("\tAllocate memory for altstack (%u bytes).\n", enough_size);
481 
482 	/*
483 	 * Try setup_altstack() with a size which can not fit
484 	 * XTILEDATA.  ARCH_REQ_XCOMP_PERM should fail.
485 	 */
486 	small_size = minsigstksz - xtiledata.size;
487 	printf("\tAfter sigaltstack() with small size (%u bytes).\n", small_size);
488 	setup_altstack(altstack, small_size, SUCCESS_EXPECTED);
489 	validate_req_xcomp_perm(FAIL_EXPECTED);
490 
491 	/*
492 	 * Try setup_altstack() with a size derived from
493 	 * AT_MINSIGSTKSZ.  It should be more than large enough
494 	 * and thus ARCH_REQ_XCOMP_PERM should succeed.
495 	 */
496 	printf("\tAfter sigaltstack() with enough size (%u bytes).\n", enough_size);
497 	setup_altstack(altstack, enough_size, SUCCESS_EXPECTED);
498 	validate_req_xcomp_perm(SUCCESS_EXPECTED);
499 
500 	/*
501 	 * Try to coerce setup_altstack() to again accept a
502 	 * too-small altstack.  This ensures that big-enough
503 	 * sigaltstacks can not shrink to a too-small value
504 	 * once XTILEDATA permission is established.
505 	 */
506 	printf("\tThen, sigaltstack() with small size (%u bytes).\n", small_size);
507 	setup_altstack(altstack, small_size, FAIL_EXPECTED);
508 }
509 
510 static void test_dynamic_state(void)
511 {
512 	pid_t parent, child, grandchild;
513 
514 	parent = fork();
515 	if (parent < 0) {
516 		/* fork() failed */
517 		fatal_error("fork");
518 	} else if (parent > 0) {
519 		int status;
520 		/* fork() succeeded.  Now in the parent. */
521 
522 		wait(&status);
523 		if (!WIFEXITED(status) || WEXITSTATUS(status))
524 			fatal_error("arch_prctl test parent exit");
525 		return;
526 	}
527 	/* fork() succeeded.  Now in the child . */
528 
529 	printf("[RUN]\tCheck ARCH_REQ_XCOMP_PERM around process fork() and sigaltack() test.\n");
530 
531 	printf("\tFork a child.\n");
532 	child = fork();
533 	if (child < 0) {
534 		fatal_error("fork");
535 	} else if (child > 0) {
536 		int status;
537 
538 		wait(&status);
539 		if (!WIFEXITED(status) || WEXITSTATUS(status))
540 			fatal_error("arch_prctl test child exit");
541 		_exit(0);
542 	}
543 
544 	/*
545 	 * The permission request should fail without an
546 	 * XTILEDATA-compatible signal stack
547 	 */
548 	printf("\tTest XCOMP_PERM at child.\n");
549 	validate_xcomp_perm(FAIL_EXPECTED);
550 
551 	/*
552 	 * Set up an XTILEDATA-compatible signal stack and
553 	 * also obtain permission to populate XTILEDATA.
554 	 */
555 	printf("\tTest dynamic sigaltstack at child:\n");
556 	test_dynamic_sigaltstack();
557 
558 	/* Ensure that XTILEDATA can be populated. */
559 	printf("\tTest XCOMP_PERM again at child.\n");
560 	validate_xcomp_perm(SUCCESS_EXPECTED);
561 
562 	printf("\tFork a grandchild.\n");
563 	grandchild = fork();
564 	if (grandchild < 0) {
565 		/* fork() failed */
566 		fatal_error("fork");
567 	} else if (!grandchild) {
568 		/* fork() succeeded.  Now in the (grand)child. */
569 		printf("\tTest XCOMP_PERM at grandchild.\n");
570 
571 		/*
572 		 * Ensure that the grandchild inherited
573 		 * permission and a compatible sigaltstack:
574 		 */
575 		validate_xcomp_perm(SUCCESS_EXPECTED);
576 	} else {
577 		int status;
578 		/* fork() succeeded.  Now in the parent. */
579 
580 		wait(&status);
581 		if (!WIFEXITED(status) || WEXITSTATUS(status))
582 			fatal_error("fork test grandchild");
583 	}
584 
585 	_exit(0);
586 }
587 
588 static inline int __compare_tiledata_state(struct xsave_buffer *xbuf1, struct xsave_buffer *xbuf2)
589 {
590 	return memcmp(&xbuf1->bytes[xtiledata.xbuf_offset],
591 		      &xbuf2->bytes[xtiledata.xbuf_offset],
592 		      xtiledata.size);
593 }
594 
595 /*
596  * Save current register state and compare it to @xbuf1.'
597  *
598  * Returns false if @xbuf1 matches the registers.
599  * Returns true  if @xbuf1 differs from the registers.
600  */
601 static inline bool __validate_tiledata_regs(struct xsave_buffer *xbuf1)
602 {
603 	struct xsave_buffer *xbuf2;
604 	int ret;
605 
606 	xbuf2 = alloc_xbuf();
607 	if (!xbuf2)
608 		fatal_error("failed to allocate XSAVE buffer\n");
609 
610 	xsave(xbuf2, XFEATURE_MASK_XTILEDATA);
611 	ret = __compare_tiledata_state(xbuf1, xbuf2);
612 
613 	free(xbuf2);
614 
615 	if (ret == 0)
616 		return false;
617 	return true;
618 }
619 
620 static inline void validate_tiledata_regs_same(struct xsave_buffer *xbuf)
621 {
622 	int ret = __validate_tiledata_regs(xbuf);
623 
624 	if (ret != 0)
625 		fatal_error("TILEDATA registers changed");
626 }
627 
628 static inline void validate_tiledata_regs_changed(struct xsave_buffer *xbuf)
629 {
630 	int ret = __validate_tiledata_regs(xbuf);
631 
632 	if (ret == 0)
633 		fatal_error("TILEDATA registers did not change");
634 }
635 
636 /* tiledata inheritance test */
637 
638 static void test_fork(void)
639 {
640 	pid_t child, grandchild;
641 
642 	child = fork();
643 	if (child < 0) {
644 		/* fork() failed */
645 		fatal_error("fork");
646 	} else if (child > 0) {
647 		/* fork() succeeded.  Now in the parent. */
648 		int status;
649 
650 		wait(&status);
651 		if (!WIFEXITED(status) || WEXITSTATUS(status))
652 			fatal_error("fork test child");
653 		return;
654 	}
655 	/* fork() succeeded.  Now in the child. */
656 	printf("[RUN]\tCheck tile data inheritance.\n\tBefore fork(), load tiledata\n");
657 
658 	load_rand_tiledata(stashed_xsave);
659 
660 	grandchild = fork();
661 	if (grandchild < 0) {
662 		/* fork() failed */
663 		fatal_error("fork");
664 	} else if (grandchild > 0) {
665 		/* fork() succeeded.  Still in the first child. */
666 		int status;
667 
668 		wait(&status);
669 		if (!WIFEXITED(status) || WEXITSTATUS(status))
670 			fatal_error("fork test grand child");
671 		_exit(0);
672 	}
673 	/* fork() succeeded.  Now in the (grand)child. */
674 
675 	/*
676 	 * TILEDATA registers are not preserved across fork().
677 	 * Ensure that their value has changed:
678 	 */
679 	validate_tiledata_regs_changed(stashed_xsave);
680 
681 	_exit(0);
682 }
683 
684 /* Context switching test */
685 
686 static struct _ctxtswtest_cfg {
687 	unsigned int iterations;
688 	unsigned int num_threads;
689 } ctxtswtest_config;
690 
691 struct futex_info {
692 	pthread_t thread;
693 	int nr;
694 	pthread_mutex_t mutex;
695 	struct futex_info *next;
696 };
697 
698 static void *check_tiledata(void *info)
699 {
700 	struct futex_info *finfo = (struct futex_info *)info;
701 	struct xsave_buffer *xbuf;
702 	int i;
703 
704 	xbuf = alloc_xbuf();
705 	if (!xbuf)
706 		fatal_error("unable to allocate XSAVE buffer");
707 
708 	/*
709 	 * Load random data into 'xbuf' and then restore
710 	 * it to the tile registers themselves.
711 	 */
712 	load_rand_tiledata(xbuf);
713 	for (i = 0; i < ctxtswtest_config.iterations; i++) {
714 		pthread_mutex_lock(&finfo->mutex);
715 
716 		/*
717 		 * Ensure the register values have not
718 		 * diverged from those recorded in 'xbuf'.
719 		 */
720 		validate_tiledata_regs_same(xbuf);
721 
722 		/* Load new, random values into xbuf and registers */
723 		load_rand_tiledata(xbuf);
724 
725 		/*
726 		 * The last thread's last unlock will be for
727 		 * thread 0's mutex.  However, thread 0 will
728 		 * have already exited the loop and the mutex
729 		 * will already be unlocked.
730 		 *
731 		 * Because this is not an ERRORCHECK mutex,
732 		 * that inconsistency will be silently ignored.
733 		 */
734 		pthread_mutex_unlock(&finfo->next->mutex);
735 	}
736 
737 	free(xbuf);
738 	/*
739 	 * Return this thread's finfo, which is
740 	 * a unique value for this thread.
741 	 */
742 	return finfo;
743 }
744 
745 static int create_threads(int num, struct futex_info *finfo)
746 {
747 	int i;
748 
749 	for (i = 0; i < num; i++) {
750 		int next_nr;
751 
752 		finfo[i].nr = i;
753 		/*
754 		 * Thread 'i' will wait on this mutex to
755 		 * be unlocked.  Lock it immediately after
756 		 * initialization:
757 		 */
758 		pthread_mutex_init(&finfo[i].mutex, NULL);
759 		pthread_mutex_lock(&finfo[i].mutex);
760 
761 		next_nr = (i + 1) % num;
762 		finfo[i].next = &finfo[next_nr];
763 
764 		if (pthread_create(&finfo[i].thread, NULL, check_tiledata, &finfo[i]))
765 			fatal_error("pthread_create()");
766 	}
767 	return 0;
768 }
769 
770 static void affinitize_cpu0(void)
771 {
772 	cpu_set_t cpuset;
773 
774 	CPU_ZERO(&cpuset);
775 	CPU_SET(0, &cpuset);
776 
777 	if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
778 		fatal_error("sched_setaffinity to CPU 0");
779 }
780 
781 static void test_context_switch(void)
782 {
783 	struct futex_info *finfo;
784 	int i;
785 
786 	/* Affinitize to one CPU to force context switches */
787 	affinitize_cpu0();
788 
789 	req_xtiledata_perm();
790 
791 	printf("[RUN]\tCheck tiledata context switches, %d iterations, %d threads.\n",
792 	       ctxtswtest_config.iterations,
793 	       ctxtswtest_config.num_threads);
794 
795 
796 	finfo = malloc(sizeof(*finfo) * ctxtswtest_config.num_threads);
797 	if (!finfo)
798 		fatal_error("malloc()");
799 
800 	create_threads(ctxtswtest_config.num_threads, finfo);
801 
802 	/*
803 	 * This thread wakes up thread 0
804 	 * Thread 0 will wake up 1
805 	 * Thread 1 will wake up 2
806 	 * ...
807 	 * the last thread will wake up 0
808 	 *
809 	 * ... this will repeat for the configured
810 	 * number of iterations.
811 	 */
812 	pthread_mutex_unlock(&finfo[0].mutex);
813 
814 	/* Wait for all the threads to finish: */
815 	for (i = 0; i < ctxtswtest_config.num_threads; i++) {
816 		void *thread_retval;
817 		int rc;
818 
819 		rc = pthread_join(finfo[i].thread, &thread_retval);
820 
821 		if (rc)
822 			fatal_error("pthread_join() failed for thread %d err: %d\n",
823 					i, rc);
824 
825 		if (thread_retval != &finfo[i])
826 			fatal_error("unexpected thread retval for thread %d: %p\n",
827 					i, thread_retval);
828 
829 	}
830 
831 	printf("[OK]\tNo incorrect case was found.\n");
832 
833 	free(finfo);
834 }
835 
836 /* Ptrace test */
837 
838 /*
839  * Make sure the ptracee has the expanded kernel buffer on the first
840  * use. Then, initialize the state before performing the state
841  * injection from the ptracer.
842  */
843 static inline void ptracee_firstuse_tiledata(void)
844 {
845 	load_rand_tiledata(stashed_xsave);
846 	init_xtiledata();
847 }
848 
849 /*
850  * Ptracer injects the randomized tile data state. It also reads
851  * before and after that, which will execute the kernel's state copy
852  * functions. So, the tester is advised to double-check any emitted
853  * kernel messages.
854  */
855 static void ptracer_inject_tiledata(pid_t target)
856 {
857 	struct xsave_buffer *xbuf;
858 	struct iovec iov;
859 
860 	xbuf = alloc_xbuf();
861 	if (!xbuf)
862 		fatal_error("unable to allocate XSAVE buffer");
863 
864 	printf("\tRead the init'ed tiledata via ptrace().\n");
865 
866 	iov.iov_base = xbuf;
867 	iov.iov_len = xbuf_size;
868 
869 	memset(stashed_xsave, 0, xbuf_size);
870 
871 	if (ptrace(PTRACE_GETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov))
872 		fatal_error("PTRACE_GETREGSET");
873 
874 	if (!__compare_tiledata_state(stashed_xsave, xbuf))
875 		printf("[OK]\tThe init'ed tiledata was read from ptracee.\n");
876 	else
877 		printf("[FAIL]\tThe init'ed tiledata was not read from ptracee.\n");
878 
879 	printf("\tInject tiledata via ptrace().\n");
880 
881 	load_rand_tiledata(xbuf);
882 
883 	memcpy(&stashed_xsave->bytes[xtiledata.xbuf_offset],
884 	       &xbuf->bytes[xtiledata.xbuf_offset],
885 	       xtiledata.size);
886 
887 	if (ptrace(PTRACE_SETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov))
888 		fatal_error("PTRACE_SETREGSET");
889 
890 	if (ptrace(PTRACE_GETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov))
891 		fatal_error("PTRACE_GETREGSET");
892 
893 	if (!__compare_tiledata_state(stashed_xsave, xbuf))
894 		printf("[OK]\tTiledata was correctly written to ptracee.\n");
895 	else
896 		printf("[FAIL]\tTiledata was not correctly written to ptracee.\n");
897 }
898 
899 static void test_ptrace(void)
900 {
901 	pid_t child;
902 	int status;
903 
904 	child = fork();
905 	if (child < 0) {
906 		err(1, "fork");
907 	} else if (!child) {
908 		if (ptrace(PTRACE_TRACEME, 0, NULL, NULL))
909 			err(1, "PTRACE_TRACEME");
910 
911 		ptracee_firstuse_tiledata();
912 
913 		raise(SIGTRAP);
914 		_exit(0);
915 	}
916 
917 	do {
918 		wait(&status);
919 	} while (WSTOPSIG(status) != SIGTRAP);
920 
921 	ptracer_inject_tiledata(child);
922 
923 	ptrace(PTRACE_DETACH, child, NULL, NULL);
924 	wait(&status);
925 	if (!WIFEXITED(status) || WEXITSTATUS(status))
926 		err(1, "ptrace test");
927 }
928 
929 int main(void)
930 {
931 	/* Check hardware availability at first */
932 	check_cpuid_xsave();
933 	check_cpuid_xtiledata();
934 
935 	init_stashed_xsave();
936 	sethandler(SIGILL, handle_noperm, 0);
937 
938 	test_dynamic_state();
939 
940 	/* Request permission for the following tests */
941 	req_xtiledata_perm();
942 
943 	test_fork();
944 
945 	ctxtswtest_config.iterations = 10;
946 	ctxtswtest_config.num_threads = 5;
947 	test_context_switch();
948 
949 	test_ptrace();
950 
951 	clearhandler(SIGILL);
952 	free_stashed_xsave();
953 
954 	return 0;
955 }
956