xref: /linux/tools/testing/selftests/x86/amx.c (revision a997157e42e3119b13c644549a3d8381a1d825d6)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 #define _GNU_SOURCE
4 #include <err.h>
5 #include <errno.h>
6 #include <pthread.h>
7 #include <setjmp.h>
8 #include <stdio.h>
9 #include <string.h>
10 #include <stdbool.h>
11 #include <unistd.h>
12 #include <x86intrin.h>
13 
14 #include <sys/auxv.h>
15 #include <sys/mman.h>
16 #include <sys/shm.h>
17 #include <sys/syscall.h>
18 #include <sys/wait.h>
19 
20 #ifndef __x86_64__
21 # error This test is 64-bit only
22 #endif
23 
24 #define XSAVE_HDR_OFFSET	512
25 #define XSAVE_HDR_SIZE		64
26 
27 struct xsave_buffer {
28 	union {
29 		struct {
30 			char legacy[XSAVE_HDR_OFFSET];
31 			char header[XSAVE_HDR_SIZE];
32 			char extended[0];
33 		};
34 		char bytes[0];
35 	};
36 };
37 
38 static inline uint64_t xgetbv(uint32_t index)
39 {
40 	uint32_t eax, edx;
41 
42 	asm volatile("xgetbv;"
43 		     : "=a" (eax), "=d" (edx)
44 		     : "c" (index));
45 	return eax + ((uint64_t)edx << 32);
46 }
47 
48 static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
49 {
50 	asm volatile("cpuid;"
51 		     : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx)
52 		     : "0" (*eax), "2" (*ecx));
53 }
54 
55 static inline void xsave(struct xsave_buffer *xbuf, uint64_t rfbm)
56 {
57 	uint32_t rfbm_lo = rfbm;
58 	uint32_t rfbm_hi = rfbm >> 32;
59 
60 	asm volatile("xsave (%%rdi)"
61 		     : : "D" (xbuf), "a" (rfbm_lo), "d" (rfbm_hi)
62 		     : "memory");
63 }
64 
65 static inline void xrstor(struct xsave_buffer *xbuf, uint64_t rfbm)
66 {
67 	uint32_t rfbm_lo = rfbm;
68 	uint32_t rfbm_hi = rfbm >> 32;
69 
70 	asm volatile("xrstor (%%rdi)"
71 		     : : "D" (xbuf), "a" (rfbm_lo), "d" (rfbm_hi));
72 }
73 
74 /* err() exits and will not return */
75 #define fatal_error(msg, ...)	err(1, "[FAIL]\t" msg, ##__VA_ARGS__)
76 
77 static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
78 		       int flags)
79 {
80 	struct sigaction sa;
81 
82 	memset(&sa, 0, sizeof(sa));
83 	sa.sa_sigaction = handler;
84 	sa.sa_flags = SA_SIGINFO | flags;
85 	sigemptyset(&sa.sa_mask);
86 	if (sigaction(sig, &sa, 0))
87 		fatal_error("sigaction");
88 }
89 
90 static void clearhandler(int sig)
91 {
92 	struct sigaction sa;
93 
94 	memset(&sa, 0, sizeof(sa));
95 	sa.sa_handler = SIG_DFL;
96 	sigemptyset(&sa.sa_mask);
97 	if (sigaction(sig, &sa, 0))
98 		fatal_error("sigaction");
99 }
100 
101 #define XFEATURE_XTILECFG	17
102 #define XFEATURE_XTILEDATA	18
103 #define XFEATURE_MASK_XTILECFG	(1 << XFEATURE_XTILECFG)
104 #define XFEATURE_MASK_XTILEDATA	(1 << XFEATURE_XTILEDATA)
105 #define XFEATURE_MASK_XTILE	(XFEATURE_MASK_XTILECFG | XFEATURE_MASK_XTILEDATA)
106 
107 #define CPUID_LEAF1_ECX_XSAVE_MASK	(1 << 26)
108 #define CPUID_LEAF1_ECX_OSXSAVE_MASK	(1 << 27)
109 static inline void check_cpuid_xsave(void)
110 {
111 	uint32_t eax, ebx, ecx, edx;
112 
113 	/*
114 	 * CPUID.1:ECX.XSAVE[bit 26] enumerates general
115 	 * support for the XSAVE feature set, including
116 	 * XGETBV.
117 	 */
118 	eax = 1;
119 	ecx = 0;
120 	cpuid(&eax, &ebx, &ecx, &edx);
121 	if (!(ecx & CPUID_LEAF1_ECX_XSAVE_MASK))
122 		fatal_error("cpuid: no CPU xsave support");
123 	if (!(ecx & CPUID_LEAF1_ECX_OSXSAVE_MASK))
124 		fatal_error("cpuid: no OS xsave support");
125 }
126 
127 static uint32_t xbuf_size;
128 
129 static struct {
130 	uint32_t xbuf_offset;
131 	uint32_t size;
132 } xtiledata;
133 
134 #define CPUID_LEAF_XSTATE		0xd
135 #define CPUID_SUBLEAF_XSTATE_USER	0x0
136 #define TILE_CPUID			0x1d
137 #define TILE_PALETTE_ID			0x1
138 
139 static void check_cpuid_xtiledata(void)
140 {
141 	uint32_t eax, ebx, ecx, edx;
142 
143 	eax = CPUID_LEAF_XSTATE;
144 	ecx = CPUID_SUBLEAF_XSTATE_USER;
145 	cpuid(&eax, &ebx, &ecx, &edx);
146 
147 	/*
148 	 * EBX enumerates the size (in bytes) required by the XSAVE
149 	 * instruction for an XSAVE area containing all the user state
150 	 * components corresponding to bits currently set in XCR0.
151 	 *
152 	 * Stash that off so it can be used to allocate buffers later.
153 	 */
154 	xbuf_size = ebx;
155 
156 	eax = CPUID_LEAF_XSTATE;
157 	ecx = XFEATURE_XTILEDATA;
158 
159 	cpuid(&eax, &ebx, &ecx, &edx);
160 	/*
161 	 * eax: XTILEDATA state component size
162 	 * ebx: XTILEDATA state component offset in user buffer
163 	 */
164 	if (!eax || !ebx)
165 		fatal_error("xstate cpuid: invalid tile data size/offset: %d/%d",
166 				eax, ebx);
167 
168 	xtiledata.size	      = eax;
169 	xtiledata.xbuf_offset = ebx;
170 }
171 
172 /* The helpers for managing XSAVE buffer and tile states: */
173 
174 struct xsave_buffer *alloc_xbuf(void)
175 {
176 	struct xsave_buffer *xbuf;
177 
178 	/* XSAVE buffer should be 64B-aligned. */
179 	xbuf = aligned_alloc(64, xbuf_size);
180 	if (!xbuf)
181 		fatal_error("aligned_alloc()");
182 	return xbuf;
183 }
184 
185 static inline void clear_xstate_header(struct xsave_buffer *buffer)
186 {
187 	memset(&buffer->header, 0, sizeof(buffer->header));
188 }
189 
190 static inline uint64_t get_xstatebv(struct xsave_buffer *buffer)
191 {
192 	/* XSTATE_BV is at the beginning of the header: */
193 	return *(uint64_t *)&buffer->header;
194 }
195 
196 static inline void set_xstatebv(struct xsave_buffer *buffer, uint64_t bv)
197 {
198 	/* XSTATE_BV is at the beginning of the header: */
199 	*(uint64_t *)(&buffer->header) = bv;
200 }
201 
202 static void set_rand_tiledata(struct xsave_buffer *xbuf)
203 {
204 	int *ptr = (int *)&xbuf->bytes[xtiledata.xbuf_offset];
205 	int data;
206 	int i;
207 
208 	/*
209 	 * Ensure that 'data' is never 0.  This ensures that
210 	 * the registers are never in their initial configuration
211 	 * and thus never tracked as being in the init state.
212 	 */
213 	data = rand() | 1;
214 
215 	for (i = 0; i < xtiledata.size / sizeof(int); i++, ptr++)
216 		*ptr = data;
217 }
218 
219 struct xsave_buffer *stashed_xsave;
220 
221 static void init_stashed_xsave(void)
222 {
223 	stashed_xsave = alloc_xbuf();
224 	if (!stashed_xsave)
225 		fatal_error("failed to allocate stashed_xsave\n");
226 	clear_xstate_header(stashed_xsave);
227 }
228 
229 static void free_stashed_xsave(void)
230 {
231 	free(stashed_xsave);
232 }
233 
234 /* See 'struct _fpx_sw_bytes' at sigcontext.h */
235 #define SW_BYTES_OFFSET		464
236 /* N.B. The struct's field name varies so read from the offset. */
237 #define SW_BYTES_BV_OFFSET	(SW_BYTES_OFFSET + 8)
238 
239 static inline struct _fpx_sw_bytes *get_fpx_sw_bytes(void *buffer)
240 {
241 	return (struct _fpx_sw_bytes *)(buffer + SW_BYTES_OFFSET);
242 }
243 
244 static inline uint64_t get_fpx_sw_bytes_features(void *buffer)
245 {
246 	return *(uint64_t *)(buffer + SW_BYTES_BV_OFFSET);
247 }
248 
249 /* Work around printf() being unsafe in signals: */
250 #define SIGNAL_BUF_LEN 1000
251 char signal_message_buffer[SIGNAL_BUF_LEN];
252 void sig_print(char *msg)
253 {
254 	int left = SIGNAL_BUF_LEN - strlen(signal_message_buffer) - 1;
255 
256 	strncat(signal_message_buffer, msg, left);
257 }
258 
259 static volatile bool noperm_signaled;
260 static int noperm_errs;
261 /*
262  * Signal handler for when AMX is used but
263  * permission has not been obtained.
264  */
265 static void handle_noperm(int sig, siginfo_t *si, void *ctx_void)
266 {
267 	ucontext_t *ctx = (ucontext_t *)ctx_void;
268 	void *xbuf = ctx->uc_mcontext.fpregs;
269 	struct _fpx_sw_bytes *sw_bytes;
270 	uint64_t features;
271 
272 	/* Reset the signal message buffer: */
273 	signal_message_buffer[0] = '\0';
274 	sig_print("\tAt SIGILL handler,\n");
275 
276 	if (si->si_code != ILL_ILLOPC) {
277 		noperm_errs++;
278 		sig_print("[FAIL]\tInvalid signal code.\n");
279 	} else {
280 		sig_print("[OK]\tValid signal code (ILL_ILLOPC).\n");
281 	}
282 
283 	sw_bytes = get_fpx_sw_bytes(xbuf);
284 	/*
285 	 * Without permission, the signal XSAVE buffer should not
286 	 * have room for AMX register state (aka. xtiledata).
287 	 * Check that the size does not overlap with where xtiledata
288 	 * will reside.
289 	 *
290 	 * This also implies that no state components *PAST*
291 	 * XTILEDATA (features >=19) can be present in the buffer.
292 	 */
293 	if (sw_bytes->xstate_size <= xtiledata.xbuf_offset) {
294 		sig_print("[OK]\tValid xstate size\n");
295 	} else {
296 		noperm_errs++;
297 		sig_print("[FAIL]\tInvalid xstate size\n");
298 	}
299 
300 	features = get_fpx_sw_bytes_features(xbuf);
301 	/*
302 	 * Without permission, the XTILEDATA feature
303 	 * bit should not be set.
304 	 */
305 	if ((features & XFEATURE_MASK_XTILEDATA) == 0) {
306 		sig_print("[OK]\tValid xstate mask\n");
307 	} else {
308 		noperm_errs++;
309 		sig_print("[FAIL]\tInvalid xstate mask\n");
310 	}
311 
312 	noperm_signaled = true;
313 	ctx->uc_mcontext.gregs[REG_RIP] += 3; /* Skip the faulting XRSTOR */
314 }
315 
316 /* Return true if XRSTOR is successful; otherwise, false. */
317 static inline bool xrstor_safe(struct xsave_buffer *xbuf, uint64_t mask)
318 {
319 	noperm_signaled = false;
320 	xrstor(xbuf, mask);
321 
322 	/* Print any messages produced by the signal code: */
323 	printf("%s", signal_message_buffer);
324 	/*
325 	 * Reset the buffer to make sure any future printing
326 	 * only outputs new messages:
327 	 */
328 	signal_message_buffer[0] = '\0';
329 
330 	if (noperm_errs)
331 		fatal_error("saw %d errors in noperm signal handler\n", noperm_errs);
332 
333 	return !noperm_signaled;
334 }
335 
336 /*
337  * Use XRSTOR to populate the XTILEDATA registers with
338  * random data.
339  *
340  * Return true if successful; otherwise, false.
341  */
342 static inline bool load_rand_tiledata(struct xsave_buffer *xbuf)
343 {
344 	clear_xstate_header(xbuf);
345 	set_xstatebv(xbuf, XFEATURE_MASK_XTILEDATA);
346 	set_rand_tiledata(xbuf);
347 	return xrstor_safe(xbuf, XFEATURE_MASK_XTILEDATA);
348 }
349 
350 /* Return XTILEDATA to its initial configuration. */
351 static inline void init_xtiledata(void)
352 {
353 	clear_xstate_header(stashed_xsave);
354 	xrstor_safe(stashed_xsave, XFEATURE_MASK_XTILEDATA);
355 }
356 
357 enum expected_result { FAIL_EXPECTED, SUCCESS_EXPECTED };
358 
359 /* arch_prctl() and sigaltstack() test */
360 
361 #define ARCH_GET_XCOMP_PERM	0x1022
362 #define ARCH_REQ_XCOMP_PERM	0x1023
363 
364 static void req_xtiledata_perm(void)
365 {
366 	syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_PERM, XFEATURE_XTILEDATA);
367 }
368 
369 static void validate_req_xcomp_perm(enum expected_result exp)
370 {
371 	unsigned long bitmask, expected_bitmask;
372 	long rc;
373 
374 	rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_PERM, &bitmask);
375 	if (rc) {
376 		fatal_error("prctl(ARCH_GET_XCOMP_PERM) error: %ld", rc);
377 	} else if (!(bitmask & XFEATURE_MASK_XTILECFG)) {
378 		fatal_error("ARCH_GET_XCOMP_PERM returns XFEATURE_XTILECFG off.");
379 	}
380 
381 	rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_PERM, XFEATURE_XTILEDATA);
382 	if (exp == FAIL_EXPECTED) {
383 		if (rc) {
384 			printf("[OK]\tARCH_REQ_XCOMP_PERM saw expected failure..\n");
385 			return;
386 		}
387 
388 		fatal_error("ARCH_REQ_XCOMP_PERM saw unexpected success.\n");
389 	} else if (rc) {
390 		fatal_error("ARCH_REQ_XCOMP_PERM saw unexpected failure.\n");
391 	}
392 
393 	expected_bitmask = bitmask | XFEATURE_MASK_XTILEDATA;
394 
395 	rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_PERM, &bitmask);
396 	if (rc) {
397 		fatal_error("prctl(ARCH_GET_XCOMP_PERM) error: %ld", rc);
398 	} else if (bitmask != expected_bitmask) {
399 		fatal_error("ARCH_REQ_XCOMP_PERM set a wrong bitmask: %lx, expected: %lx.\n",
400 			    bitmask, expected_bitmask);
401 	} else {
402 		printf("\tARCH_REQ_XCOMP_PERM is successful.\n");
403 	}
404 }
405 
406 static void validate_xcomp_perm(enum expected_result exp)
407 {
408 	bool load_success = load_rand_tiledata(stashed_xsave);
409 
410 	if (exp == FAIL_EXPECTED) {
411 		if (load_success) {
412 			noperm_errs++;
413 			printf("[FAIL]\tLoad tiledata succeeded.\n");
414 		} else {
415 			printf("[OK]\tLoad tiledata failed.\n");
416 		}
417 	} else if (exp == SUCCESS_EXPECTED) {
418 		if (load_success) {
419 			printf("[OK]\tLoad tiledata succeeded.\n");
420 		} else {
421 			noperm_errs++;
422 			printf("[FAIL]\tLoad tiledata failed.\n");
423 		}
424 	}
425 }
426 
427 #ifndef AT_MINSIGSTKSZ
428 #  define AT_MINSIGSTKSZ	51
429 #endif
430 
431 static void *alloc_altstack(unsigned int size)
432 {
433 	void *altstack;
434 
435 	altstack = mmap(NULL, size, PROT_READ | PROT_WRITE,
436 			MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
437 
438 	if (altstack == MAP_FAILED)
439 		fatal_error("mmap() for altstack");
440 
441 	return altstack;
442 }
443 
444 static void setup_altstack(void *addr, unsigned long size, enum expected_result exp)
445 {
446 	stack_t ss;
447 	int rc;
448 
449 	memset(&ss, 0, sizeof(ss));
450 	ss.ss_size = size;
451 	ss.ss_sp = addr;
452 
453 	rc = sigaltstack(&ss, NULL);
454 
455 	if (exp == FAIL_EXPECTED) {
456 		if (rc) {
457 			printf("[OK]\tsigaltstack() failed.\n");
458 		} else {
459 			fatal_error("sigaltstack() succeeded unexpectedly.\n");
460 		}
461 	} else if (rc) {
462 		fatal_error("sigaltstack()");
463 	}
464 }
465 
466 static void test_dynamic_sigaltstack(void)
467 {
468 	unsigned int small_size, enough_size;
469 	unsigned long minsigstksz;
470 	void *altstack;
471 
472 	minsigstksz = getauxval(AT_MINSIGSTKSZ);
473 	printf("\tAT_MINSIGSTKSZ = %lu\n", minsigstksz);
474 	/*
475 	 * getauxval() itself can return 0 for failure or
476 	 * success.  But, in this case, AT_MINSIGSTKSZ
477 	 * will always return a >=0 value if implemented.
478 	 * Just check for 0.
479 	 */
480 	if (minsigstksz == 0) {
481 		printf("no support for AT_MINSIGSTKSZ, skipping sigaltstack tests\n");
482 		return;
483 	}
484 
485 	enough_size = minsigstksz * 2;
486 
487 	altstack = alloc_altstack(enough_size);
488 	printf("\tAllocate memory for altstack (%u bytes).\n", enough_size);
489 
490 	/*
491 	 * Try setup_altstack() with a size which can not fit
492 	 * XTILEDATA.  ARCH_REQ_XCOMP_PERM should fail.
493 	 */
494 	small_size = minsigstksz - xtiledata.size;
495 	printf("\tAfter sigaltstack() with small size (%u bytes).\n", small_size);
496 	setup_altstack(altstack, small_size, SUCCESS_EXPECTED);
497 	validate_req_xcomp_perm(FAIL_EXPECTED);
498 
499 	/*
500 	 * Try setup_altstack() with a size derived from
501 	 * AT_MINSIGSTKSZ.  It should be more than large enough
502 	 * and thus ARCH_REQ_XCOMP_PERM should succeed.
503 	 */
504 	printf("\tAfter sigaltstack() with enough size (%u bytes).\n", enough_size);
505 	setup_altstack(altstack, enough_size, SUCCESS_EXPECTED);
506 	validate_req_xcomp_perm(SUCCESS_EXPECTED);
507 
508 	/*
509 	 * Try to coerce setup_altstack() to again accept a
510 	 * too-small altstack.  This ensures that big-enough
511 	 * sigaltstacks can not shrink to a too-small value
512 	 * once XTILEDATA permission is established.
513 	 */
514 	printf("\tThen, sigaltstack() with small size (%u bytes).\n", small_size);
515 	setup_altstack(altstack, small_size, FAIL_EXPECTED);
516 }
517 
518 static void test_dynamic_state(void)
519 {
520 	pid_t parent, child, grandchild;
521 
522 	parent = fork();
523 	if (parent < 0) {
524 		/* fork() failed */
525 		fatal_error("fork");
526 	} else if (parent > 0) {
527 		int status;
528 		/* fork() succeeded.  Now in the parent. */
529 
530 		wait(&status);
531 		if (!WIFEXITED(status) || WEXITSTATUS(status))
532 			fatal_error("arch_prctl test parent exit");
533 		return;
534 	}
535 	/* fork() succeeded.  Now in the child . */
536 
537 	printf("[RUN]\tCheck ARCH_REQ_XCOMP_PERM around process fork() and sigaltack() test.\n");
538 
539 	printf("\tFork a child.\n");
540 	child = fork();
541 	if (child < 0) {
542 		fatal_error("fork");
543 	} else if (child > 0) {
544 		int status;
545 
546 		wait(&status);
547 		if (!WIFEXITED(status) || WEXITSTATUS(status))
548 			fatal_error("arch_prctl test child exit");
549 		_exit(0);
550 	}
551 
552 	/*
553 	 * The permission request should fail without an
554 	 * XTILEDATA-compatible signal stack
555 	 */
556 	printf("\tTest XCOMP_PERM at child.\n");
557 	validate_xcomp_perm(FAIL_EXPECTED);
558 
559 	/*
560 	 * Set up an XTILEDATA-compatible signal stack and
561 	 * also obtain permission to populate XTILEDATA.
562 	 */
563 	printf("\tTest dynamic sigaltstack at child:\n");
564 	test_dynamic_sigaltstack();
565 
566 	/* Ensure that XTILEDATA can be populated. */
567 	printf("\tTest XCOMP_PERM again at child.\n");
568 	validate_xcomp_perm(SUCCESS_EXPECTED);
569 
570 	printf("\tFork a grandchild.\n");
571 	grandchild = fork();
572 	if (grandchild < 0) {
573 		/* fork() failed */
574 		fatal_error("fork");
575 	} else if (!grandchild) {
576 		/* fork() succeeded.  Now in the (grand)child. */
577 		printf("\tTest XCOMP_PERM at grandchild.\n");
578 
579 		/*
580 		 * Ensure that the grandchild inherited
581 		 * permission and a compatible sigaltstack:
582 		 */
583 		validate_xcomp_perm(SUCCESS_EXPECTED);
584 	} else {
585 		int status;
586 		/* fork() succeeded.  Now in the parent. */
587 
588 		wait(&status);
589 		if (!WIFEXITED(status) || WEXITSTATUS(status))
590 			fatal_error("fork test grandchild");
591 	}
592 
593 	_exit(0);
594 }
595 
596 /*
597  * Save current register state and compare it to @xbuf1.'
598  *
599  * Returns false if @xbuf1 matches the registers.
600  * Returns true  if @xbuf1 differs from the registers.
601  */
602 static inline bool __validate_tiledata_regs(struct xsave_buffer *xbuf1)
603 {
604 	struct xsave_buffer *xbuf2;
605 	int ret;
606 
607 	xbuf2 = alloc_xbuf();
608 	if (!xbuf2)
609 		fatal_error("failed to allocate XSAVE buffer\n");
610 
611 	xsave(xbuf2, XFEATURE_MASK_XTILEDATA);
612 	ret = memcmp(&xbuf1->bytes[xtiledata.xbuf_offset],
613 		     &xbuf2->bytes[xtiledata.xbuf_offset],
614 		     xtiledata.size);
615 
616 	free(xbuf2);
617 
618 	if (ret == 0)
619 		return false;
620 	return true;
621 }
622 
623 static inline void validate_tiledata_regs_same(struct xsave_buffer *xbuf)
624 {
625 	int ret = __validate_tiledata_regs(xbuf);
626 
627 	if (ret != 0)
628 		fatal_error("TILEDATA registers changed");
629 }
630 
631 static inline void validate_tiledata_regs_changed(struct xsave_buffer *xbuf)
632 {
633 	int ret = __validate_tiledata_regs(xbuf);
634 
635 	if (ret == 0)
636 		fatal_error("TILEDATA registers did not change");
637 }
638 
639 /* tiledata inheritance test */
640 
641 static void test_fork(void)
642 {
643 	pid_t child, grandchild;
644 
645 	child = fork();
646 	if (child < 0) {
647 		/* fork() failed */
648 		fatal_error("fork");
649 	} else if (child > 0) {
650 		/* fork() succeeded.  Now in the parent. */
651 		int status;
652 
653 		wait(&status);
654 		if (!WIFEXITED(status) || WEXITSTATUS(status))
655 			fatal_error("fork test child");
656 		return;
657 	}
658 	/* fork() succeeded.  Now in the child. */
659 	printf("[RUN]\tCheck tile data inheritance.\n\tBefore fork(), load tiledata\n");
660 
661 	load_rand_tiledata(stashed_xsave);
662 
663 	grandchild = fork();
664 	if (grandchild < 0) {
665 		/* fork() failed */
666 		fatal_error("fork");
667 	} else if (grandchild > 0) {
668 		/* fork() succeeded.  Still in the first child. */
669 		int status;
670 
671 		wait(&status);
672 		if (!WIFEXITED(status) || WEXITSTATUS(status))
673 			fatal_error("fork test grand child");
674 		_exit(0);
675 	}
676 	/* fork() succeeded.  Now in the (grand)child. */
677 
678 	/*
679 	 * TILEDATA registers are not preserved across fork().
680 	 * Ensure that their value has changed:
681 	 */
682 	validate_tiledata_regs_changed(stashed_xsave);
683 
684 	_exit(0);
685 }
686 
687 /* Context switching test */
688 
689 static struct _ctxtswtest_cfg {
690 	unsigned int iterations;
691 	unsigned int num_threads;
692 } ctxtswtest_config;
693 
694 struct futex_info {
695 	pthread_t thread;
696 	int nr;
697 	pthread_mutex_t mutex;
698 	struct futex_info *next;
699 };
700 
701 static void *check_tiledata(void *info)
702 {
703 	struct futex_info *finfo = (struct futex_info *)info;
704 	struct xsave_buffer *xbuf;
705 	int i;
706 
707 	xbuf = alloc_xbuf();
708 	if (!xbuf)
709 		fatal_error("unable to allocate XSAVE buffer");
710 
711 	/*
712 	 * Load random data into 'xbuf' and then restore
713 	 * it to the tile registers themselves.
714 	 */
715 	load_rand_tiledata(xbuf);
716 	for (i = 0; i < ctxtswtest_config.iterations; i++) {
717 		pthread_mutex_lock(&finfo->mutex);
718 
719 		/*
720 		 * Ensure the register values have not
721 		 * diverged from those recorded in 'xbuf'.
722 		 */
723 		validate_tiledata_regs_same(xbuf);
724 
725 		/* Load new, random values into xbuf and registers */
726 		load_rand_tiledata(xbuf);
727 
728 		/*
729 		 * The last thread's last unlock will be for
730 		 * thread 0's mutex.  However, thread 0 will
731 		 * have already exited the loop and the mutex
732 		 * will already be unlocked.
733 		 *
734 		 * Because this is not an ERRORCHECK mutex,
735 		 * that inconsistency will be silently ignored.
736 		 */
737 		pthread_mutex_unlock(&finfo->next->mutex);
738 	}
739 
740 	free(xbuf);
741 	/*
742 	 * Return this thread's finfo, which is
743 	 * a unique value for this thread.
744 	 */
745 	return finfo;
746 }
747 
748 static int create_threads(int num, struct futex_info *finfo)
749 {
750 	int i;
751 
752 	for (i = 0; i < num; i++) {
753 		int next_nr;
754 
755 		finfo[i].nr = i;
756 		/*
757 		 * Thread 'i' will wait on this mutex to
758 		 * be unlocked.  Lock it immediately after
759 		 * initialization:
760 		 */
761 		pthread_mutex_init(&finfo[i].mutex, NULL);
762 		pthread_mutex_lock(&finfo[i].mutex);
763 
764 		next_nr = (i + 1) % num;
765 		finfo[i].next = &finfo[next_nr];
766 
767 		if (pthread_create(&finfo[i].thread, NULL, check_tiledata, &finfo[i]))
768 			fatal_error("pthread_create()");
769 	}
770 	return 0;
771 }
772 
773 static void affinitize_cpu0(void)
774 {
775 	cpu_set_t cpuset;
776 
777 	CPU_ZERO(&cpuset);
778 	CPU_SET(0, &cpuset);
779 
780 	if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
781 		fatal_error("sched_setaffinity to CPU 0");
782 }
783 
784 static void test_context_switch(void)
785 {
786 	struct futex_info *finfo;
787 	int i;
788 
789 	/* Affinitize to one CPU to force context switches */
790 	affinitize_cpu0();
791 
792 	req_xtiledata_perm();
793 
794 	printf("[RUN]\tCheck tiledata context switches, %d iterations, %d threads.\n",
795 	       ctxtswtest_config.iterations,
796 	       ctxtswtest_config.num_threads);
797 
798 
799 	finfo = malloc(sizeof(*finfo) * ctxtswtest_config.num_threads);
800 	if (!finfo)
801 		fatal_error("malloc()");
802 
803 	create_threads(ctxtswtest_config.num_threads, finfo);
804 
805 	/*
806 	 * This thread wakes up thread 0
807 	 * Thread 0 will wake up 1
808 	 * Thread 1 will wake up 2
809 	 * ...
810 	 * the last thread will wake up 0
811 	 *
812 	 * ... this will repeat for the configured
813 	 * number of iterations.
814 	 */
815 	pthread_mutex_unlock(&finfo[0].mutex);
816 
817 	/* Wait for all the threads to finish: */
818 	for (i = 0; i < ctxtswtest_config.num_threads; i++) {
819 		void *thread_retval;
820 		int rc;
821 
822 		rc = pthread_join(finfo[i].thread, &thread_retval);
823 
824 		if (rc)
825 			fatal_error("pthread_join() failed for thread %d err: %d\n",
826 					i, rc);
827 
828 		if (thread_retval != &finfo[i])
829 			fatal_error("unexpected thread retval for thread %d: %p\n",
830 					i, thread_retval);
831 
832 	}
833 
834 	printf("[OK]\tNo incorrect case was found.\n");
835 
836 	free(finfo);
837 }
838 
839 int main(void)
840 {
841 	/* Check hardware availability at first */
842 	check_cpuid_xsave();
843 	check_cpuid_xtiledata();
844 
845 	init_stashed_xsave();
846 	sethandler(SIGILL, handle_noperm, 0);
847 
848 	test_dynamic_state();
849 
850 	/* Request permission for the following tests */
851 	req_xtiledata_perm();
852 
853 	test_fork();
854 
855 	ctxtswtest_config.iterations = 10;
856 	ctxtswtest_config.num_threads = 5;
857 	test_context_switch();
858 
859 	clearhandler(SIGILL);
860 	free_stashed_xsave();
861 
862 	return 0;
863 }
864