xref: /linux/tools/testing/selftests/x86/amx.c (revision 722ecdbce68a87de2d9296f91308f44ea900a039)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 #define _GNU_SOURCE
4 #include <err.h>
5 #include <errno.h>
6 #include <pthread.h>
7 #include <setjmp.h>
8 #include <stdio.h>
9 #include <string.h>
10 #include <stdbool.h>
11 #include <unistd.h>
12 #include <x86intrin.h>
13 
14 #include <sys/auxv.h>
15 #include <sys/mman.h>
16 #include <sys/shm.h>
17 #include <sys/syscall.h>
18 #include <sys/wait.h>
19 
20 #include "../kselftest.h" /* For __cpuid_count() */
21 
22 #ifndef __x86_64__
23 # error This test is 64-bit only
24 #endif
25 
26 #define XSAVE_HDR_OFFSET	512
27 #define XSAVE_HDR_SIZE		64
28 
29 struct xsave_buffer {
30 	union {
31 		struct {
32 			char legacy[XSAVE_HDR_OFFSET];
33 			char header[XSAVE_HDR_SIZE];
34 			char extended[0];
35 		};
36 		char bytes[0];
37 	};
38 };
39 
40 static inline uint64_t xgetbv(uint32_t index)
41 {
42 	uint32_t eax, edx;
43 
44 	asm volatile("xgetbv;"
45 		     : "=a" (eax), "=d" (edx)
46 		     : "c" (index));
47 	return eax + ((uint64_t)edx << 32);
48 }
49 
50 static inline void xsave(struct xsave_buffer *xbuf, uint64_t rfbm)
51 {
52 	uint32_t rfbm_lo = rfbm;
53 	uint32_t rfbm_hi = rfbm >> 32;
54 
55 	asm volatile("xsave (%%rdi)"
56 		     : : "D" (xbuf), "a" (rfbm_lo), "d" (rfbm_hi)
57 		     : "memory");
58 }
59 
60 static inline void xrstor(struct xsave_buffer *xbuf, uint64_t rfbm)
61 {
62 	uint32_t rfbm_lo = rfbm;
63 	uint32_t rfbm_hi = rfbm >> 32;
64 
65 	asm volatile("xrstor (%%rdi)"
66 		     : : "D" (xbuf), "a" (rfbm_lo), "d" (rfbm_hi));
67 }
68 
69 /* err() exits and will not return */
70 #define fatal_error(msg, ...)	err(1, "[FAIL]\t" msg, ##__VA_ARGS__)
71 
72 static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
73 		       int flags)
74 {
75 	struct sigaction sa;
76 
77 	memset(&sa, 0, sizeof(sa));
78 	sa.sa_sigaction = handler;
79 	sa.sa_flags = SA_SIGINFO | flags;
80 	sigemptyset(&sa.sa_mask);
81 	if (sigaction(sig, &sa, 0))
82 		fatal_error("sigaction");
83 }
84 
85 static void clearhandler(int sig)
86 {
87 	struct sigaction sa;
88 
89 	memset(&sa, 0, sizeof(sa));
90 	sa.sa_handler = SIG_DFL;
91 	sigemptyset(&sa.sa_mask);
92 	if (sigaction(sig, &sa, 0))
93 		fatal_error("sigaction");
94 }
95 
96 #define XFEATURE_XTILECFG	17
97 #define XFEATURE_XTILEDATA	18
98 #define XFEATURE_MASK_XTILECFG	(1 << XFEATURE_XTILECFG)
99 #define XFEATURE_MASK_XTILEDATA	(1 << XFEATURE_XTILEDATA)
100 #define XFEATURE_MASK_XTILE	(XFEATURE_MASK_XTILECFG | XFEATURE_MASK_XTILEDATA)
101 
102 #define CPUID_LEAF1_ECX_XSAVE_MASK	(1 << 26)
103 #define CPUID_LEAF1_ECX_OSXSAVE_MASK	(1 << 27)
104 static inline void check_cpuid_xsave(void)
105 {
106 	uint32_t eax, ebx, ecx, edx;
107 
108 	/*
109 	 * CPUID.1:ECX.XSAVE[bit 26] enumerates general
110 	 * support for the XSAVE feature set, including
111 	 * XGETBV.
112 	 */
113 	__cpuid_count(1, 0, eax, ebx, ecx, edx);
114 	if (!(ecx & CPUID_LEAF1_ECX_XSAVE_MASK))
115 		fatal_error("cpuid: no CPU xsave support");
116 	if (!(ecx & CPUID_LEAF1_ECX_OSXSAVE_MASK))
117 		fatal_error("cpuid: no OS xsave support");
118 }
119 
120 static uint32_t xbuf_size;
121 
122 static struct {
123 	uint32_t xbuf_offset;
124 	uint32_t size;
125 } xtiledata;
126 
127 #define CPUID_LEAF_XSTATE		0xd
128 #define CPUID_SUBLEAF_XSTATE_USER	0x0
129 #define TILE_CPUID			0x1d
130 #define TILE_PALETTE_ID			0x1
131 
132 static void check_cpuid_xtiledata(void)
133 {
134 	uint32_t eax, ebx, ecx, edx;
135 
136 	__cpuid_count(CPUID_LEAF_XSTATE, CPUID_SUBLEAF_XSTATE_USER,
137 		      eax, ebx, ecx, edx);
138 
139 	/*
140 	 * EBX enumerates the size (in bytes) required by the XSAVE
141 	 * instruction for an XSAVE area containing all the user state
142 	 * components corresponding to bits currently set in XCR0.
143 	 *
144 	 * Stash that off so it can be used to allocate buffers later.
145 	 */
146 	xbuf_size = ebx;
147 
148 	__cpuid_count(CPUID_LEAF_XSTATE, XFEATURE_XTILEDATA,
149 		      eax, ebx, ecx, edx);
150 	/*
151 	 * eax: XTILEDATA state component size
152 	 * ebx: XTILEDATA state component offset in user buffer
153 	 */
154 	if (!eax || !ebx)
155 		fatal_error("xstate cpuid: invalid tile data size/offset: %d/%d",
156 				eax, ebx);
157 
158 	xtiledata.size	      = eax;
159 	xtiledata.xbuf_offset = ebx;
160 }
161 
162 /* The helpers for managing XSAVE buffer and tile states: */
163 
164 struct xsave_buffer *alloc_xbuf(void)
165 {
166 	struct xsave_buffer *xbuf;
167 
168 	/* XSAVE buffer should be 64B-aligned. */
169 	xbuf = aligned_alloc(64, xbuf_size);
170 	if (!xbuf)
171 		fatal_error("aligned_alloc()");
172 	return xbuf;
173 }
174 
175 static inline void clear_xstate_header(struct xsave_buffer *buffer)
176 {
177 	memset(&buffer->header, 0, sizeof(buffer->header));
178 }
179 
180 static inline uint64_t get_xstatebv(struct xsave_buffer *buffer)
181 {
182 	/* XSTATE_BV is at the beginning of the header: */
183 	return *(uint64_t *)&buffer->header;
184 }
185 
186 static inline void set_xstatebv(struct xsave_buffer *buffer, uint64_t bv)
187 {
188 	/* XSTATE_BV is at the beginning of the header: */
189 	*(uint64_t *)(&buffer->header) = bv;
190 }
191 
192 static void set_rand_tiledata(struct xsave_buffer *xbuf)
193 {
194 	int *ptr = (int *)&xbuf->bytes[xtiledata.xbuf_offset];
195 	int data;
196 	int i;
197 
198 	/*
199 	 * Ensure that 'data' is never 0.  This ensures that
200 	 * the registers are never in their initial configuration
201 	 * and thus never tracked as being in the init state.
202 	 */
203 	data = rand() | 1;
204 
205 	for (i = 0; i < xtiledata.size / sizeof(int); i++, ptr++)
206 		*ptr = data;
207 }
208 
209 struct xsave_buffer *stashed_xsave;
210 
211 static void init_stashed_xsave(void)
212 {
213 	stashed_xsave = alloc_xbuf();
214 	if (!stashed_xsave)
215 		fatal_error("failed to allocate stashed_xsave\n");
216 	clear_xstate_header(stashed_xsave);
217 }
218 
219 static void free_stashed_xsave(void)
220 {
221 	free(stashed_xsave);
222 }
223 
224 /* See 'struct _fpx_sw_bytes' at sigcontext.h */
225 #define SW_BYTES_OFFSET		464
226 /* N.B. The struct's field name varies so read from the offset. */
227 #define SW_BYTES_BV_OFFSET	(SW_BYTES_OFFSET + 8)
228 
229 static inline struct _fpx_sw_bytes *get_fpx_sw_bytes(void *buffer)
230 {
231 	return (struct _fpx_sw_bytes *)(buffer + SW_BYTES_OFFSET);
232 }
233 
234 static inline uint64_t get_fpx_sw_bytes_features(void *buffer)
235 {
236 	return *(uint64_t *)(buffer + SW_BYTES_BV_OFFSET);
237 }
238 
239 /* Work around printf() being unsafe in signals: */
240 #define SIGNAL_BUF_LEN 1000
241 char signal_message_buffer[SIGNAL_BUF_LEN];
242 void sig_print(char *msg)
243 {
244 	int left = SIGNAL_BUF_LEN - strlen(signal_message_buffer) - 1;
245 
246 	strncat(signal_message_buffer, msg, left);
247 }
248 
249 static volatile bool noperm_signaled;
250 static int noperm_errs;
251 /*
252  * Signal handler for when AMX is used but
253  * permission has not been obtained.
254  */
255 static void handle_noperm(int sig, siginfo_t *si, void *ctx_void)
256 {
257 	ucontext_t *ctx = (ucontext_t *)ctx_void;
258 	void *xbuf = ctx->uc_mcontext.fpregs;
259 	struct _fpx_sw_bytes *sw_bytes;
260 	uint64_t features;
261 
262 	/* Reset the signal message buffer: */
263 	signal_message_buffer[0] = '\0';
264 	sig_print("\tAt SIGILL handler,\n");
265 
266 	if (si->si_code != ILL_ILLOPC) {
267 		noperm_errs++;
268 		sig_print("[FAIL]\tInvalid signal code.\n");
269 	} else {
270 		sig_print("[OK]\tValid signal code (ILL_ILLOPC).\n");
271 	}
272 
273 	sw_bytes = get_fpx_sw_bytes(xbuf);
274 	/*
275 	 * Without permission, the signal XSAVE buffer should not
276 	 * have room for AMX register state (aka. xtiledata).
277 	 * Check that the size does not overlap with where xtiledata
278 	 * will reside.
279 	 *
280 	 * This also implies that no state components *PAST*
281 	 * XTILEDATA (features >=19) can be present in the buffer.
282 	 */
283 	if (sw_bytes->xstate_size <= xtiledata.xbuf_offset) {
284 		sig_print("[OK]\tValid xstate size\n");
285 	} else {
286 		noperm_errs++;
287 		sig_print("[FAIL]\tInvalid xstate size\n");
288 	}
289 
290 	features = get_fpx_sw_bytes_features(xbuf);
291 	/*
292 	 * Without permission, the XTILEDATA feature
293 	 * bit should not be set.
294 	 */
295 	if ((features & XFEATURE_MASK_XTILEDATA) == 0) {
296 		sig_print("[OK]\tValid xstate mask\n");
297 	} else {
298 		noperm_errs++;
299 		sig_print("[FAIL]\tInvalid xstate mask\n");
300 	}
301 
302 	noperm_signaled = true;
303 	ctx->uc_mcontext.gregs[REG_RIP] += 3; /* Skip the faulting XRSTOR */
304 }
305 
306 /* Return true if XRSTOR is successful; otherwise, false. */
307 static inline bool xrstor_safe(struct xsave_buffer *xbuf, uint64_t mask)
308 {
309 	noperm_signaled = false;
310 	xrstor(xbuf, mask);
311 
312 	/* Print any messages produced by the signal code: */
313 	printf("%s", signal_message_buffer);
314 	/*
315 	 * Reset the buffer to make sure any future printing
316 	 * only outputs new messages:
317 	 */
318 	signal_message_buffer[0] = '\0';
319 
320 	if (noperm_errs)
321 		fatal_error("saw %d errors in noperm signal handler\n", noperm_errs);
322 
323 	return !noperm_signaled;
324 }
325 
326 /*
327  * Use XRSTOR to populate the XTILEDATA registers with
328  * random data.
329  *
330  * Return true if successful; otherwise, false.
331  */
332 static inline bool load_rand_tiledata(struct xsave_buffer *xbuf)
333 {
334 	clear_xstate_header(xbuf);
335 	set_xstatebv(xbuf, XFEATURE_MASK_XTILEDATA);
336 	set_rand_tiledata(xbuf);
337 	return xrstor_safe(xbuf, XFEATURE_MASK_XTILEDATA);
338 }
339 
340 /* Return XTILEDATA to its initial configuration. */
341 static inline void init_xtiledata(void)
342 {
343 	clear_xstate_header(stashed_xsave);
344 	xrstor_safe(stashed_xsave, XFEATURE_MASK_XTILEDATA);
345 }
346 
347 enum expected_result { FAIL_EXPECTED, SUCCESS_EXPECTED };
348 
349 /* arch_prctl() and sigaltstack() test */
350 
351 #define ARCH_GET_XCOMP_PERM	0x1022
352 #define ARCH_REQ_XCOMP_PERM	0x1023
353 
354 static void req_xtiledata_perm(void)
355 {
356 	syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_PERM, XFEATURE_XTILEDATA);
357 }
358 
359 static void validate_req_xcomp_perm(enum expected_result exp)
360 {
361 	unsigned long bitmask, expected_bitmask;
362 	long rc;
363 
364 	rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_PERM, &bitmask);
365 	if (rc) {
366 		fatal_error("prctl(ARCH_GET_XCOMP_PERM) error: %ld", rc);
367 	} else if (!(bitmask & XFEATURE_MASK_XTILECFG)) {
368 		fatal_error("ARCH_GET_XCOMP_PERM returns XFEATURE_XTILECFG off.");
369 	}
370 
371 	rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_PERM, XFEATURE_XTILEDATA);
372 	if (exp == FAIL_EXPECTED) {
373 		if (rc) {
374 			printf("[OK]\tARCH_REQ_XCOMP_PERM saw expected failure..\n");
375 			return;
376 		}
377 
378 		fatal_error("ARCH_REQ_XCOMP_PERM saw unexpected success.\n");
379 	} else if (rc) {
380 		fatal_error("ARCH_REQ_XCOMP_PERM saw unexpected failure.\n");
381 	}
382 
383 	expected_bitmask = bitmask | XFEATURE_MASK_XTILEDATA;
384 
385 	rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_PERM, &bitmask);
386 	if (rc) {
387 		fatal_error("prctl(ARCH_GET_XCOMP_PERM) error: %ld", rc);
388 	} else if (bitmask != expected_bitmask) {
389 		fatal_error("ARCH_REQ_XCOMP_PERM set a wrong bitmask: %lx, expected: %lx.\n",
390 			    bitmask, expected_bitmask);
391 	} else {
392 		printf("\tARCH_REQ_XCOMP_PERM is successful.\n");
393 	}
394 }
395 
396 static void validate_xcomp_perm(enum expected_result exp)
397 {
398 	bool load_success = load_rand_tiledata(stashed_xsave);
399 
400 	if (exp == FAIL_EXPECTED) {
401 		if (load_success) {
402 			noperm_errs++;
403 			printf("[FAIL]\tLoad tiledata succeeded.\n");
404 		} else {
405 			printf("[OK]\tLoad tiledata failed.\n");
406 		}
407 	} else if (exp == SUCCESS_EXPECTED) {
408 		if (load_success) {
409 			printf("[OK]\tLoad tiledata succeeded.\n");
410 		} else {
411 			noperm_errs++;
412 			printf("[FAIL]\tLoad tiledata failed.\n");
413 		}
414 	}
415 }
416 
417 #ifndef AT_MINSIGSTKSZ
418 #  define AT_MINSIGSTKSZ	51
419 #endif
420 
421 static void *alloc_altstack(unsigned int size)
422 {
423 	void *altstack;
424 
425 	altstack = mmap(NULL, size, PROT_READ | PROT_WRITE,
426 			MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
427 
428 	if (altstack == MAP_FAILED)
429 		fatal_error("mmap() for altstack");
430 
431 	return altstack;
432 }
433 
434 static void setup_altstack(void *addr, unsigned long size, enum expected_result exp)
435 {
436 	stack_t ss;
437 	int rc;
438 
439 	memset(&ss, 0, sizeof(ss));
440 	ss.ss_size = size;
441 	ss.ss_sp = addr;
442 
443 	rc = sigaltstack(&ss, NULL);
444 
445 	if (exp == FAIL_EXPECTED) {
446 		if (rc) {
447 			printf("[OK]\tsigaltstack() failed.\n");
448 		} else {
449 			fatal_error("sigaltstack() succeeded unexpectedly.\n");
450 		}
451 	} else if (rc) {
452 		fatal_error("sigaltstack()");
453 	}
454 }
455 
456 static void test_dynamic_sigaltstack(void)
457 {
458 	unsigned int small_size, enough_size;
459 	unsigned long minsigstksz;
460 	void *altstack;
461 
462 	minsigstksz = getauxval(AT_MINSIGSTKSZ);
463 	printf("\tAT_MINSIGSTKSZ = %lu\n", minsigstksz);
464 	/*
465 	 * getauxval() itself can return 0 for failure or
466 	 * success.  But, in this case, AT_MINSIGSTKSZ
467 	 * will always return a >=0 value if implemented.
468 	 * Just check for 0.
469 	 */
470 	if (minsigstksz == 0) {
471 		printf("no support for AT_MINSIGSTKSZ, skipping sigaltstack tests\n");
472 		return;
473 	}
474 
475 	enough_size = minsigstksz * 2;
476 
477 	altstack = alloc_altstack(enough_size);
478 	printf("\tAllocate memory for altstack (%u bytes).\n", enough_size);
479 
480 	/*
481 	 * Try setup_altstack() with a size which can not fit
482 	 * XTILEDATA.  ARCH_REQ_XCOMP_PERM should fail.
483 	 */
484 	small_size = minsigstksz - xtiledata.size;
485 	printf("\tAfter sigaltstack() with small size (%u bytes).\n", small_size);
486 	setup_altstack(altstack, small_size, SUCCESS_EXPECTED);
487 	validate_req_xcomp_perm(FAIL_EXPECTED);
488 
489 	/*
490 	 * Try setup_altstack() with a size derived from
491 	 * AT_MINSIGSTKSZ.  It should be more than large enough
492 	 * and thus ARCH_REQ_XCOMP_PERM should succeed.
493 	 */
494 	printf("\tAfter sigaltstack() with enough size (%u bytes).\n", enough_size);
495 	setup_altstack(altstack, enough_size, SUCCESS_EXPECTED);
496 	validate_req_xcomp_perm(SUCCESS_EXPECTED);
497 
498 	/*
499 	 * Try to coerce setup_altstack() to again accept a
500 	 * too-small altstack.  This ensures that big-enough
501 	 * sigaltstacks can not shrink to a too-small value
502 	 * once XTILEDATA permission is established.
503 	 */
504 	printf("\tThen, sigaltstack() with small size (%u bytes).\n", small_size);
505 	setup_altstack(altstack, small_size, FAIL_EXPECTED);
506 }
507 
508 static void test_dynamic_state(void)
509 {
510 	pid_t parent, child, grandchild;
511 
512 	parent = fork();
513 	if (parent < 0) {
514 		/* fork() failed */
515 		fatal_error("fork");
516 	} else if (parent > 0) {
517 		int status;
518 		/* fork() succeeded.  Now in the parent. */
519 
520 		wait(&status);
521 		if (!WIFEXITED(status) || WEXITSTATUS(status))
522 			fatal_error("arch_prctl test parent exit");
523 		return;
524 	}
525 	/* fork() succeeded.  Now in the child . */
526 
527 	printf("[RUN]\tCheck ARCH_REQ_XCOMP_PERM around process fork() and sigaltack() test.\n");
528 
529 	printf("\tFork a child.\n");
530 	child = fork();
531 	if (child < 0) {
532 		fatal_error("fork");
533 	} else if (child > 0) {
534 		int status;
535 
536 		wait(&status);
537 		if (!WIFEXITED(status) || WEXITSTATUS(status))
538 			fatal_error("arch_prctl test child exit");
539 		_exit(0);
540 	}
541 
542 	/*
543 	 * The permission request should fail without an
544 	 * XTILEDATA-compatible signal stack
545 	 */
546 	printf("\tTest XCOMP_PERM at child.\n");
547 	validate_xcomp_perm(FAIL_EXPECTED);
548 
549 	/*
550 	 * Set up an XTILEDATA-compatible signal stack and
551 	 * also obtain permission to populate XTILEDATA.
552 	 */
553 	printf("\tTest dynamic sigaltstack at child:\n");
554 	test_dynamic_sigaltstack();
555 
556 	/* Ensure that XTILEDATA can be populated. */
557 	printf("\tTest XCOMP_PERM again at child.\n");
558 	validate_xcomp_perm(SUCCESS_EXPECTED);
559 
560 	printf("\tFork a grandchild.\n");
561 	grandchild = fork();
562 	if (grandchild < 0) {
563 		/* fork() failed */
564 		fatal_error("fork");
565 	} else if (!grandchild) {
566 		/* fork() succeeded.  Now in the (grand)child. */
567 		printf("\tTest XCOMP_PERM at grandchild.\n");
568 
569 		/*
570 		 * Ensure that the grandchild inherited
571 		 * permission and a compatible sigaltstack:
572 		 */
573 		validate_xcomp_perm(SUCCESS_EXPECTED);
574 	} else {
575 		int status;
576 		/* fork() succeeded.  Now in the parent. */
577 
578 		wait(&status);
579 		if (!WIFEXITED(status) || WEXITSTATUS(status))
580 			fatal_error("fork test grandchild");
581 	}
582 
583 	_exit(0);
584 }
585 
586 /*
587  * Save current register state and compare it to @xbuf1.'
588  *
589  * Returns false if @xbuf1 matches the registers.
590  * Returns true  if @xbuf1 differs from the registers.
591  */
592 static inline bool __validate_tiledata_regs(struct xsave_buffer *xbuf1)
593 {
594 	struct xsave_buffer *xbuf2;
595 	int ret;
596 
597 	xbuf2 = alloc_xbuf();
598 	if (!xbuf2)
599 		fatal_error("failed to allocate XSAVE buffer\n");
600 
601 	xsave(xbuf2, XFEATURE_MASK_XTILEDATA);
602 	ret = memcmp(&xbuf1->bytes[xtiledata.xbuf_offset],
603 		     &xbuf2->bytes[xtiledata.xbuf_offset],
604 		     xtiledata.size);
605 
606 	free(xbuf2);
607 
608 	if (ret == 0)
609 		return false;
610 	return true;
611 }
612 
613 static inline void validate_tiledata_regs_same(struct xsave_buffer *xbuf)
614 {
615 	int ret = __validate_tiledata_regs(xbuf);
616 
617 	if (ret != 0)
618 		fatal_error("TILEDATA registers changed");
619 }
620 
621 static inline void validate_tiledata_regs_changed(struct xsave_buffer *xbuf)
622 {
623 	int ret = __validate_tiledata_regs(xbuf);
624 
625 	if (ret == 0)
626 		fatal_error("TILEDATA registers did not change");
627 }
628 
629 /* tiledata inheritance test */
630 
631 static void test_fork(void)
632 {
633 	pid_t child, grandchild;
634 
635 	child = fork();
636 	if (child < 0) {
637 		/* fork() failed */
638 		fatal_error("fork");
639 	} else if (child > 0) {
640 		/* fork() succeeded.  Now in the parent. */
641 		int status;
642 
643 		wait(&status);
644 		if (!WIFEXITED(status) || WEXITSTATUS(status))
645 			fatal_error("fork test child");
646 		return;
647 	}
648 	/* fork() succeeded.  Now in the child. */
649 	printf("[RUN]\tCheck tile data inheritance.\n\tBefore fork(), load tiledata\n");
650 
651 	load_rand_tiledata(stashed_xsave);
652 
653 	grandchild = fork();
654 	if (grandchild < 0) {
655 		/* fork() failed */
656 		fatal_error("fork");
657 	} else if (grandchild > 0) {
658 		/* fork() succeeded.  Still in the first child. */
659 		int status;
660 
661 		wait(&status);
662 		if (!WIFEXITED(status) || WEXITSTATUS(status))
663 			fatal_error("fork test grand child");
664 		_exit(0);
665 	}
666 	/* fork() succeeded.  Now in the (grand)child. */
667 
668 	/*
669 	 * TILEDATA registers are not preserved across fork().
670 	 * Ensure that their value has changed:
671 	 */
672 	validate_tiledata_regs_changed(stashed_xsave);
673 
674 	_exit(0);
675 }
676 
677 /* Context switching test */
678 
679 static struct _ctxtswtest_cfg {
680 	unsigned int iterations;
681 	unsigned int num_threads;
682 } ctxtswtest_config;
683 
684 struct futex_info {
685 	pthread_t thread;
686 	int nr;
687 	pthread_mutex_t mutex;
688 	struct futex_info *next;
689 };
690 
691 static void *check_tiledata(void *info)
692 {
693 	struct futex_info *finfo = (struct futex_info *)info;
694 	struct xsave_buffer *xbuf;
695 	int i;
696 
697 	xbuf = alloc_xbuf();
698 	if (!xbuf)
699 		fatal_error("unable to allocate XSAVE buffer");
700 
701 	/*
702 	 * Load random data into 'xbuf' and then restore
703 	 * it to the tile registers themselves.
704 	 */
705 	load_rand_tiledata(xbuf);
706 	for (i = 0; i < ctxtswtest_config.iterations; i++) {
707 		pthread_mutex_lock(&finfo->mutex);
708 
709 		/*
710 		 * Ensure the register values have not
711 		 * diverged from those recorded in 'xbuf'.
712 		 */
713 		validate_tiledata_regs_same(xbuf);
714 
715 		/* Load new, random values into xbuf and registers */
716 		load_rand_tiledata(xbuf);
717 
718 		/*
719 		 * The last thread's last unlock will be for
720 		 * thread 0's mutex.  However, thread 0 will
721 		 * have already exited the loop and the mutex
722 		 * will already be unlocked.
723 		 *
724 		 * Because this is not an ERRORCHECK mutex,
725 		 * that inconsistency will be silently ignored.
726 		 */
727 		pthread_mutex_unlock(&finfo->next->mutex);
728 	}
729 
730 	free(xbuf);
731 	/*
732 	 * Return this thread's finfo, which is
733 	 * a unique value for this thread.
734 	 */
735 	return finfo;
736 }
737 
738 static int create_threads(int num, struct futex_info *finfo)
739 {
740 	int i;
741 
742 	for (i = 0; i < num; i++) {
743 		int next_nr;
744 
745 		finfo[i].nr = i;
746 		/*
747 		 * Thread 'i' will wait on this mutex to
748 		 * be unlocked.  Lock it immediately after
749 		 * initialization:
750 		 */
751 		pthread_mutex_init(&finfo[i].mutex, NULL);
752 		pthread_mutex_lock(&finfo[i].mutex);
753 
754 		next_nr = (i + 1) % num;
755 		finfo[i].next = &finfo[next_nr];
756 
757 		if (pthread_create(&finfo[i].thread, NULL, check_tiledata, &finfo[i]))
758 			fatal_error("pthread_create()");
759 	}
760 	return 0;
761 }
762 
763 static void affinitize_cpu0(void)
764 {
765 	cpu_set_t cpuset;
766 
767 	CPU_ZERO(&cpuset);
768 	CPU_SET(0, &cpuset);
769 
770 	if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
771 		fatal_error("sched_setaffinity to CPU 0");
772 }
773 
774 static void test_context_switch(void)
775 {
776 	struct futex_info *finfo;
777 	int i;
778 
779 	/* Affinitize to one CPU to force context switches */
780 	affinitize_cpu0();
781 
782 	req_xtiledata_perm();
783 
784 	printf("[RUN]\tCheck tiledata context switches, %d iterations, %d threads.\n",
785 	       ctxtswtest_config.iterations,
786 	       ctxtswtest_config.num_threads);
787 
788 
789 	finfo = malloc(sizeof(*finfo) * ctxtswtest_config.num_threads);
790 	if (!finfo)
791 		fatal_error("malloc()");
792 
793 	create_threads(ctxtswtest_config.num_threads, finfo);
794 
795 	/*
796 	 * This thread wakes up thread 0
797 	 * Thread 0 will wake up 1
798 	 * Thread 1 will wake up 2
799 	 * ...
800 	 * the last thread will wake up 0
801 	 *
802 	 * ... this will repeat for the configured
803 	 * number of iterations.
804 	 */
805 	pthread_mutex_unlock(&finfo[0].mutex);
806 
807 	/* Wait for all the threads to finish: */
808 	for (i = 0; i < ctxtswtest_config.num_threads; i++) {
809 		void *thread_retval;
810 		int rc;
811 
812 		rc = pthread_join(finfo[i].thread, &thread_retval);
813 
814 		if (rc)
815 			fatal_error("pthread_join() failed for thread %d err: %d\n",
816 					i, rc);
817 
818 		if (thread_retval != &finfo[i])
819 			fatal_error("unexpected thread retval for thread %d: %p\n",
820 					i, thread_retval);
821 
822 	}
823 
824 	printf("[OK]\tNo incorrect case was found.\n");
825 
826 	free(finfo);
827 }
828 
829 int main(void)
830 {
831 	/* Check hardware availability at first */
832 	check_cpuid_xsave();
833 	check_cpuid_xtiledata();
834 
835 	init_stashed_xsave();
836 	sethandler(SIGILL, handle_noperm, 0);
837 
838 	test_dynamic_state();
839 
840 	/* Request permission for the following tests */
841 	req_xtiledata_perm();
842 
843 	test_fork();
844 
845 	ctxtswtest_config.iterations = 10;
846 	ctxtswtest_config.num_threads = 5;
847 	test_context_switch();
848 
849 	clearhandler(SIGILL);
850 	free_stashed_xsave();
851 
852 	return 0;
853 }
854