xref: /linux/tools/testing/selftests/mm/protection_keys.c (revision 25356081894df2ebcb140fcd28191e55c2d82a22)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Tests Memory Protection Keys (see Documentation/core-api/protection-keys.rst)
4  *
5  * There are examples in here of:
6  *  * how to set protection keys on memory
7  *  * how to set/clear bits in pkey registers (the rights register)
8  *  * how to handle SEGV_PKUERR signals and extract pkey-relevant
9  *    information from the siginfo
10  *
11  * Things to add:
12  *	make sure KSM and KSM COW breaking works
13  *	prefault pages in at malloc, or not
14  *	protect MPX bounds tables with protection keys?
15  *	make sure VMA splitting/merging is working correctly
16  *	OOMs can destroy mm->mmap (see exit_mmap()), so make sure it is immune to pkeys
17  *	look for pkey "leaks" where it is still set on a VMA but "freed" back to the kernel
18  *	do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks
19  *
20  * Compile like this:
21  *	gcc -mxsave      -o protection_keys    -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
22  *	gcc -mxsave -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
23  */
24 #define _GNU_SOURCE
25 #define __SANE_USERSPACE_TYPES__
26 #include <errno.h>
27 #include <linux/elf.h>
28 #include <linux/futex.h>
29 #include <time.h>
30 #include <sys/time.h>
31 #include <sys/syscall.h>
32 #include <string.h>
33 #include <stdio.h>
34 #include <stdint.h>
35 #include <stdbool.h>
36 #include <signal.h>
37 #include <assert.h>
38 #include <stdlib.h>
39 #include <ucontext.h>
40 #include <sys/mman.h>
41 #include <sys/types.h>
42 #include <sys/wait.h>
43 #include <sys/stat.h>
44 #include <fcntl.h>
45 #include <unistd.h>
46 #include <sys/ptrace.h>
47 #include <setjmp.h>
48 
49 #include "hugepage_settings.h"
50 #include "pkey-helpers.h"
51 
52 int iteration_nr = 1;
53 int test_nr;
54 
55 u64 shadow_pkey_reg;
56 int dprint_in_signal;
57 
58 noinline int read_ptr(int *ptr)
59 {
60 	/* Keep GCC from optimizing this away somehow */
61 	barrier();
62 	return *ptr;
63 }
64 
65 #if CONTROL_TRACING > 0
66 static void cat_into_file(char *str, char *file)
67 {
68 	int fd = open(file, O_RDWR);
69 	int ret;
70 
71 	dprintf2("%s(): writing '%s' to '%s'\n", __func__, str, file);
72 	/*
73 	 * these need to be raw because they are called under
74 	 * pkey_assert()
75 	 */
76 	if (fd < 0) {
77 		fprintf(stderr, "error opening '%s'\n", str);
78 		perror("error: ");
79 		exit(__LINE__);
80 	}
81 
82 	ret = write(fd, str, strlen(str));
83 	if (ret != strlen(str)) {
84 		perror("write to file failed");
85 		fprintf(stderr, "filename: '%s' str: '%s'\n", file, str);
86 		exit(__LINE__);
87 	}
88 	close(fd);
89 }
90 
91 static int warned_tracing;
92 static int tracing_root_ok(void)
93 {
94 	if (geteuid() != 0) {
95 		if (!warned_tracing)
96 			fprintf(stderr, "WARNING: not run as root, "
97 					"can not do tracing control\n");
98 		warned_tracing = 1;
99 		return 0;
100 	}
101 	return 1;
102 }
103 #endif
104 
105 static void tracing_on(void)
106 {
107 #if CONTROL_TRACING > 0
108 #define TRACEDIR "/sys/kernel/tracing"
109 	char pidstr[32];
110 
111 	if (!tracing_root_ok())
112 		return;
113 
114 	sprintf(pidstr, "%d", getpid());
115 	cat_into_file("0", TRACEDIR "/tracing_on");
116 	cat_into_file("\n", TRACEDIR "/trace");
117 	if (1) {
118 		cat_into_file("function_graph", TRACEDIR "/current_tracer");
119 		cat_into_file("1", TRACEDIR "/options/funcgraph-proc");
120 	} else {
121 		cat_into_file("nop", TRACEDIR "/current_tracer");
122 	}
123 	cat_into_file(pidstr, TRACEDIR "/set_ftrace_pid");
124 	cat_into_file("1", TRACEDIR "/tracing_on");
125 	dprintf1("enabled tracing\n");
126 #endif
127 }
128 
129 static void tracing_off(void)
130 {
131 #if CONTROL_TRACING > 0
132 	if (!tracing_root_ok())
133 		return;
134 	cat_into_file("0", "/sys/kernel/tracing/tracing_on");
135 #endif
136 }
137 
138 void abort_hooks(void)
139 {
140 	fflush(stdout);
141 	fprintf(stderr, "running %s()...\n", __func__);
142 	tracing_off();
143 #ifdef SLEEP_ON_ABORT
144 	sleep(SLEEP_ON_ABORT);
145 #endif
146 }
147 
148 /*
149  * This attempts to have roughly a page of instructions followed by a few
150  * instructions that do a write, and another page of instructions.  That
151  * way, we are pretty sure that the write is in the second page of
152  * instructions and has at least a page of padding behind it.
153  *
154  * *That* lets us be sure to madvise() away the write instruction, which
155  * will then fault, which makes sure that the fault code handles
156  * execute-only memory properly.
157  */
158 #if defined(__powerpc64__) || defined(__aarch64__)
159 /* This way, both 4K and 64K alignment are maintained */
160 __attribute__((__aligned__(65536)))
161 #else
162 __attribute__((__aligned__(PAGE_SIZE)))
163 #endif
164 static void lots_o_noops_around_write(int *write_to_me)
165 {
166 	dprintf3("running %s()\n", __func__);
167 	__page_o_noops();
168 	/* Assume this happens in the second page of instructions: */
169 	*write_to_me = __LINE__;
170 	/* pad out by another page: */
171 	__page_o_noops();
172 	dprintf3("%s() done\n", __func__);
173 }
174 
175 static void dump_mem(void *dumpme, int len_bytes)
176 {
177 	char *c = (void *)dumpme;
178 	int i;
179 
180 	for (i = 0; i < len_bytes; i += sizeof(u64)) {
181 		u64 *ptr = (u64 *)(c + i);
182 		dprintf1("dump[%03d][@%p]: %016llx\n", i, ptr, *ptr);
183 	}
184 }
185 
186 static u32 hw_pkey_get(int pkey, unsigned long flags)
187 {
188 	u64 pkey_reg = __read_pkey_reg();
189 
190 	dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n",
191 			__func__, pkey, flags, 0, 0);
192 	dprintf2("%s() raw pkey_reg: %016llx\n", __func__, pkey_reg);
193 
194 	return (u32) get_pkey_bits(pkey_reg, pkey);
195 }
196 
197 static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags)
198 {
199 	u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
200 	u64 old_pkey_reg = __read_pkey_reg();
201 	u64 new_pkey_reg;
202 
203 	/* make sure that 'rights' only contains the bits we expect: */
204 	assert(!(rights & ~mask));
205 
206 	/* modify bits accordingly in old pkey_reg and assign it */
207 	new_pkey_reg = set_pkey_bits(old_pkey_reg, pkey, rights);
208 
209 	__write_pkey_reg(new_pkey_reg);
210 
211 	dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x"
212 		" pkey_reg now: %016llx old_pkey_reg: %016llx\n",
213 		__func__, pkey, rights, flags, 0, __read_pkey_reg(),
214 		old_pkey_reg);
215 	return 0;
216 }
217 
218 static void pkey_disable_set(int pkey, int flags)
219 {
220 	unsigned long syscall_flags = 0;
221 	int ret;
222 	int pkey_rights;
223 
224 	dprintf1("START->%s(%d, 0x%x)\n", __func__,
225 		pkey, flags);
226 	pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
227 
228 	pkey_rights = hw_pkey_get(pkey, syscall_flags);
229 
230 	dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
231 			pkey, pkey, pkey_rights);
232 
233 	pkey_assert(pkey_rights >= 0);
234 
235 	pkey_rights |= flags;
236 
237 	ret = hw_pkey_set(pkey, pkey_rights, syscall_flags);
238 	assert(!ret);
239 	/* pkey_reg and flags have the same format */
240 	shadow_pkey_reg = set_pkey_bits(shadow_pkey_reg, pkey, pkey_rights);
241 	dprintf1("%s(%d) shadow: 0x%016llx\n",
242 		__func__, pkey, shadow_pkey_reg);
243 
244 	pkey_assert(ret >= 0);
245 
246 	pkey_rights = hw_pkey_get(pkey, syscall_flags);
247 	dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
248 			pkey, pkey, pkey_rights);
249 
250 	dprintf1("%s(%d) pkey_reg: 0x%016llx\n",
251 		__func__, pkey, read_pkey_reg());
252 	dprintf1("END<---%s(%d, 0x%x)\n", __func__,
253 		pkey, flags);
254 }
255 
256 static void pkey_disable_clear(int pkey, int flags)
257 {
258 	unsigned long syscall_flags = 0;
259 	int ret;
260 	int pkey_rights = hw_pkey_get(pkey, syscall_flags);
261 
262 	pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
263 
264 	dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
265 			pkey, pkey, pkey_rights);
266 	pkey_assert(pkey_rights >= 0);
267 
268 	pkey_rights &= ~flags;
269 
270 	ret = hw_pkey_set(pkey, pkey_rights, 0);
271 	shadow_pkey_reg = set_pkey_bits(shadow_pkey_reg, pkey, pkey_rights);
272 	pkey_assert(ret >= 0);
273 
274 	pkey_rights = hw_pkey_get(pkey, syscall_flags);
275 	dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
276 			pkey, pkey, pkey_rights);
277 
278 	dprintf1("%s(%d) pkey_reg: 0x%016llx\n", __func__,
279 			pkey, read_pkey_reg());
280 }
281 
282 __maybe_unused static void pkey_write_allow(int pkey)
283 {
284 	pkey_disable_clear(pkey, PKEY_DISABLE_WRITE);
285 }
286 __maybe_unused static void pkey_write_deny(int pkey)
287 {
288 	pkey_disable_set(pkey, PKEY_DISABLE_WRITE);
289 }
290 __maybe_unused static void pkey_access_allow(int pkey)
291 {
292 	pkey_disable_clear(pkey, PKEY_DISABLE_ACCESS);
293 }
294 __maybe_unused static void pkey_access_deny(int pkey)
295 {
296 	pkey_disable_set(pkey, PKEY_DISABLE_ACCESS);
297 }
298 
299 static char *si_code_str(int si_code)
300 {
301 	if (si_code == SEGV_MAPERR)
302 		return "SEGV_MAPERR";
303 	if (si_code == SEGV_ACCERR)
304 		return "SEGV_ACCERR";
305 	if (si_code == SEGV_BNDERR)
306 		return "SEGV_BNDERR";
307 	if (si_code == SEGV_PKUERR)
308 		return "SEGV_PKUERR";
309 	return "UNKNOWN";
310 }
311 
312 static int pkey_faults;
313 static int last_si_pkey = -1;
314 static void signal_handler(int signum, siginfo_t *si, void *vucontext)
315 {
316 	ucontext_t *uctxt = vucontext;
317 	int trapno;
318 	unsigned long ip;
319 #ifdef MCONTEXT_FPREGS
320 	char *fpregs;
321 #endif
322 #if defined(__i386__) || defined(__x86_64__) /* arch */
323 	u32 *pkey_reg_ptr;
324 	int pkey_reg_offset;
325 #endif /* arch */
326 	u64 siginfo_pkey;
327 	u32 *si_pkey_ptr;
328 
329 	dprint_in_signal = 1;
330 	dprintf1(">>>>===============SIGSEGV============================\n");
331 	dprintf1("%s()::%d, pkey_reg: 0x%016llx shadow: %016llx\n",
332 			__func__, __LINE__,
333 			__read_pkey_reg(), shadow_pkey_reg);
334 
335 	trapno = MCONTEXT_TRAPNO(uctxt->uc_mcontext);
336 	ip = MCONTEXT_IP(uctxt->uc_mcontext);
337 #ifdef MCONTEXT_FPREGS
338 	fpregs = (char *) uctxt->uc_mcontext.fpregs;
339 #endif
340 
341 	dprintf2("%s() trapno: %d ip: 0x%016lx info->si_code: %s/%d\n",
342 			__func__, trapno, ip, si_code_str(si->si_code),
343 			si->si_code);
344 
345 #if defined(__i386__) || defined(__x86_64__) /* arch */
346 #ifdef __i386__
347 	/*
348 	 * 32-bit has some extra padding so that userspace can tell whether
349 	 * the XSTATE header is present in addition to the "legacy" FPU
350 	 * state.  We just assume that it is here.
351 	 */
352 	fpregs += 0x70;
353 #endif /* i386 */
354 	pkey_reg_offset = pkey_reg_xstate_offset();
355 	pkey_reg_ptr = (void *)(&fpregs[pkey_reg_offset]);
356 
357 	/*
358 	 * If we got a PKEY fault, we *HAVE* to have at least one bit set in
359 	 * here.
360 	 */
361 	dprintf1("pkey_reg_xstate_offset: %d\n", pkey_reg_xstate_offset());
362 	if (DEBUG_LEVEL > 4)
363 		dump_mem(pkey_reg_ptr - 128, 256);
364 	pkey_assert(*pkey_reg_ptr);
365 #endif /* arch */
366 
367 	dprintf1("siginfo: %p\n", si);
368 #ifdef MCONTEXT_FPREGS
369 	dprintf1(" fpregs: %p\n", fpregs);
370 #endif
371 
372 	if ((si->si_code == SEGV_MAPERR) ||
373 	    (si->si_code == SEGV_ACCERR) ||
374 	    (si->si_code == SEGV_BNDERR)) {
375 		dprintf0("# non-PK si_code: %d, exiting...\n", si->si_code);
376 		exit(1);
377 	}
378 
379 	si_pkey_ptr = siginfo_get_pkey_ptr(si);
380 	dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr);
381 	dump_mem((u8 *)si_pkey_ptr - 8, 24);
382 	siginfo_pkey = *si_pkey_ptr;
383 	pkey_assert(siginfo_pkey < NR_PKEYS);
384 	last_si_pkey = siginfo_pkey;
385 
386 	/*
387 	 * need __read_pkey_reg() version so we do not do shadow_pkey_reg
388 	 * checking
389 	 */
390 	dprintf1("signal pkey_reg from  pkey_reg: %016llx\n",
391 			__read_pkey_reg());
392 	dprintf1("pkey from siginfo: %016llx\n", siginfo_pkey);
393 #if defined(__i386__) || defined(__x86_64__) /* arch */
394 	dprintf1("signal pkey_reg from xsave: %08x\n", *pkey_reg_ptr);
395 	*(u64 *)pkey_reg_ptr = 0x00000000;
396 	dprintf1("WARNING: set PKEY_REG=0 to allow faulting instruction to continue\n");
397 #elif defined(__powerpc64__) /* arch */
398 	/* restore access and let the faulting instruction continue */
399 	pkey_access_allow(siginfo_pkey);
400 #elif defined(__aarch64__)
401 	aarch64_write_signal_pkey(uctxt, PKEY_REG_ALLOW_ALL);
402 #endif /* arch */
403 	pkey_faults++;
404 	dprintf1("<<<<==================================================\n");
405 	dprint_in_signal = 0;
406 }
407 
408 static void sig_chld(int x)
409 {
410 	dprint_in_signal = 1;
411 	dprintf2("[%d] SIGCHLD: %d\n", getpid(), x);
412 	dprint_in_signal = 0;
413 }
414 
415 static void setup_sigsegv_handler(void)
416 {
417 	int r, rs;
418 	struct sigaction newact;
419 	struct sigaction oldact;
420 
421 	/* #PF is mapped to sigsegv */
422 	int signum  = SIGSEGV;
423 
424 	newact.sa_handler = 0;
425 	newact.sa_sigaction = signal_handler;
426 
427 	/*sigset_t - signals to block while in the handler */
428 	/* get the old signal mask. */
429 	rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask);
430 	pkey_assert(rs == 0);
431 
432 	/* call sa_sigaction, not sa_handler*/
433 	newact.sa_flags = SA_SIGINFO;
434 
435 	newact.sa_restorer = 0;  /* void(*)(), obsolete */
436 	r = sigaction(signum, &newact, &oldact);
437 	r = sigaction(SIGALRM, &newact, &oldact);
438 	pkey_assert(r == 0);
439 }
440 
441 static void setup_handlers(void)
442 {
443 	signal(SIGCHLD, &sig_chld);
444 	setup_sigsegv_handler();
445 }
446 
447 static pid_t fork_lazy_child(void)
448 {
449 	pid_t forkret;
450 
451 	forkret = fork();
452 	pkey_assert(forkret >= 0);
453 	dprintf3("[%d] fork() ret: %d\n", getpid(), forkret);
454 
455 	if (!forkret) {
456 		/* in the child */
457 		while (1) {
458 			dprintf1("child sleeping...\n");
459 			sleep(30);
460 		}
461 	}
462 	return forkret;
463 }
464 
465 static int alloc_pkey(void)
466 {
467 	int ret;
468 	unsigned long init_val = PKEY_UNRESTRICTED;
469 
470 	dprintf1("%s()::%d, pkey_reg: 0x%016llx shadow: %016llx\n",
471 			__func__, __LINE__, __read_pkey_reg(), shadow_pkey_reg);
472 	ret = sys_pkey_alloc(0, init_val);
473 	/*
474 	 * pkey_alloc() sets PKEY register, so we need to reflect it in
475 	 * shadow_pkey_reg:
476 	 */
477 	dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016llx"
478 			" shadow: 0x%016llx\n",
479 			__func__, __LINE__, ret, __read_pkey_reg(),
480 			shadow_pkey_reg);
481 	if (ret > 0) {
482 		/* clear both the bits: */
483 		shadow_pkey_reg = set_pkey_bits(shadow_pkey_reg, ret,
484 						~PKEY_MASK);
485 		dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016llx"
486 				" shadow: 0x%016llx\n",
487 				__func__,
488 				__LINE__, ret, __read_pkey_reg(),
489 				shadow_pkey_reg);
490 		/*
491 		 * move the new state in from init_val
492 		 * (remember, we cheated and init_val == pkey_reg format)
493 		 */
494 		shadow_pkey_reg = set_pkey_bits(shadow_pkey_reg, ret,
495 						init_val);
496 	}
497 	dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016llx"
498 			" shadow: 0x%016llx\n",
499 			__func__, __LINE__, ret, __read_pkey_reg(),
500 			shadow_pkey_reg);
501 	dprintf1("%s()::%d errno: %d\n", __func__, __LINE__, errno);
502 	/* for shadow checking: */
503 	read_pkey_reg();
504 	dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016llx"
505 		 " shadow: 0x%016llx\n",
506 		__func__, __LINE__, ret, __read_pkey_reg(),
507 		shadow_pkey_reg);
508 	return ret;
509 }
510 
511 /*
512  * I had a bug where pkey bits could be set by mprotect() but
513  * not cleared.  This ensures we get lots of random bit sets
514  * and clears on the vma and pte pkey bits.
515  */
516 static int alloc_random_pkey(void)
517 {
518 	int max_nr_pkey_allocs;
519 	int ret;
520 	int i;
521 	int alloced_pkeys[NR_PKEYS];
522 	int nr_alloced = 0;
523 	int random_index;
524 	memset(alloced_pkeys, 0, sizeof(alloced_pkeys));
525 
526 	/* allocate every possible key and make a note of which ones we got */
527 	max_nr_pkey_allocs = NR_PKEYS;
528 	for (i = 0; i < max_nr_pkey_allocs; i++) {
529 		int new_pkey = alloc_pkey();
530 		if (new_pkey < 0)
531 			break;
532 		alloced_pkeys[nr_alloced++] = new_pkey;
533 	}
534 
535 	pkey_assert(nr_alloced > 0);
536 	/* select a random one out of the allocated ones */
537 	random_index = rand() % nr_alloced;
538 	ret = alloced_pkeys[random_index];
539 	/* now zero it out so we don't free it next */
540 	alloced_pkeys[random_index] = 0;
541 
542 	/* go through the allocated ones that we did not want and free them */
543 	for (i = 0; i < nr_alloced; i++) {
544 		int free_ret;
545 		if (!alloced_pkeys[i])
546 			continue;
547 		free_ret = sys_pkey_free(alloced_pkeys[i]);
548 		pkey_assert(!free_ret);
549 	}
550 	dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx"
551 			 " shadow: 0x%016llx\n", __func__,
552 			__LINE__, ret, __read_pkey_reg(), shadow_pkey_reg);
553 	return ret;
554 }
555 
556 int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
557 		unsigned long pkey)
558 {
559 	int nr_iterations = random() % 100;
560 	int ret;
561 
562 	while (nr_iterations-- >= 0) {
563 		int rpkey = alloc_random_pkey();
564 		ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey);
565 		dprintf1("sys_mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n",
566 				ptr, size, orig_prot, pkey, ret);
567 
568 		dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx"
569 			" shadow: 0x%016llx\n",
570 			__func__, __LINE__, ret, __read_pkey_reg(),
571 			shadow_pkey_reg);
572 		sys_pkey_free(rpkey);
573 		dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx"
574 			" shadow: 0x%016llx\n",
575 			__func__, __LINE__, ret, __read_pkey_reg(),
576 			shadow_pkey_reg);
577 	}
578 	pkey_assert(pkey < NR_PKEYS);
579 
580 	ret = sys_mprotect_pkey(ptr, size, orig_prot, pkey);
581 	dprintf1("mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n",
582 			ptr, size, orig_prot, pkey, ret);
583 	pkey_assert(!ret);
584 	dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx"
585 			" shadow: 0x%016llx\n", __func__,
586 			__LINE__, ret, __read_pkey_reg(), shadow_pkey_reg);
587 	return ret;
588 }
589 
590 struct pkey_malloc_record {
591 	void *ptr;
592 	long size;
593 	int prot;
594 };
595 struct pkey_malloc_record *pkey_malloc_records;
596 struct pkey_malloc_record *pkey_last_malloc_record;
597 static long nr_pkey_malloc_records;
598 void record_pkey_malloc(void *ptr, long size, int prot)
599 {
600 	long i;
601 	struct pkey_malloc_record *rec = NULL;
602 
603 	for (i = 0; i < nr_pkey_malloc_records; i++) {
604 		rec = &pkey_malloc_records[i];
605 		/* find a free record */
606 		if (rec)
607 			break;
608 	}
609 	if (!rec) {
610 		/* every record is full */
611 		size_t old_nr_records = nr_pkey_malloc_records;
612 		size_t new_nr_records = (nr_pkey_malloc_records * 2 + 1);
613 		size_t new_size = new_nr_records * sizeof(struct pkey_malloc_record);
614 		dprintf2("new_nr_records: %zd\n", new_nr_records);
615 		dprintf2("new_size: %zd\n", new_size);
616 		pkey_malloc_records = realloc(pkey_malloc_records, new_size);
617 		pkey_assert(pkey_malloc_records != NULL);
618 		rec = &pkey_malloc_records[nr_pkey_malloc_records];
619 		/*
620 		 * realloc() does not initialize memory, so zero it from
621 		 * the first new record all the way to the end.
622 		 */
623 		for (i = 0; i < new_nr_records - old_nr_records; i++)
624 			memset(rec + i, 0, sizeof(*rec));
625 	}
626 	dprintf3("filling malloc record[%d/%p]: {%p, %ld}\n",
627 		(int)(rec - pkey_malloc_records), rec, ptr, size);
628 	rec->ptr = ptr;
629 	rec->size = size;
630 	rec->prot = prot;
631 	pkey_last_malloc_record = rec;
632 	nr_pkey_malloc_records++;
633 }
634 
635 static void free_pkey_malloc(void *ptr)
636 {
637 	long i;
638 	int ret;
639 	dprintf3("%s(%p)\n", __func__, ptr);
640 	for (i = 0; i < nr_pkey_malloc_records; i++) {
641 		struct pkey_malloc_record *rec = &pkey_malloc_records[i];
642 		dprintf4("looking for ptr %p at record[%ld/%p]: {%p, %ld}\n",
643 				ptr, i, rec, rec->ptr, rec->size);
644 		if ((ptr <  rec->ptr) ||
645 		    (ptr >= rec->ptr + rec->size))
646 			continue;
647 
648 		dprintf3("found ptr %p at record[%ld/%p]: {%p, %ld}\n",
649 				ptr, i, rec, rec->ptr, rec->size);
650 		nr_pkey_malloc_records--;
651 		ret = munmap(rec->ptr, rec->size);
652 		dprintf3("munmap ret: %d\n", ret);
653 		pkey_assert(!ret);
654 		dprintf3("clearing rec->ptr, rec: %p\n", rec);
655 		rec->ptr = NULL;
656 		dprintf3("done clearing rec->ptr, rec: %p\n", rec);
657 		return;
658 	}
659 	pkey_assert(false);
660 }
661 
662 static void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey)
663 {
664 	void *ptr;
665 	int ret;
666 
667 	read_pkey_reg();
668 	dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
669 			size, prot, pkey);
670 	pkey_assert(pkey < NR_PKEYS);
671 	ptr = mmap(NULL, size, prot, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
672 	pkey_assert(ptr != (void *)-1);
673 	ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey);
674 	pkey_assert(!ret);
675 	record_pkey_malloc(ptr, size, prot);
676 	read_pkey_reg();
677 
678 	dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr);
679 	return ptr;
680 }
681 
682 static void *malloc_pkey_anon_huge(long size, int prot, u16 pkey)
683 {
684 	int ret;
685 	void *ptr;
686 
687 	dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
688 			size, prot, pkey);
689 	/*
690 	 * Guarantee we can fit at least one huge page in the resulting
691 	 * allocation by allocating space for 2:
692 	 */
693 	size = ALIGN_UP(size, HPAGE_SIZE * 2);
694 	ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
695 	pkey_assert(ptr != (void *)-1);
696 	record_pkey_malloc(ptr, size, prot);
697 	mprotect_pkey(ptr, size, prot, pkey);
698 
699 	dprintf1("unaligned ptr: %p\n", ptr);
700 	ptr = ALIGN_PTR_UP(ptr, HPAGE_SIZE);
701 	dprintf1("  aligned ptr: %p\n", ptr);
702 	ret = madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE);
703 	dprintf1("MADV_HUGEPAGE ret: %d\n", ret);
704 	ret = madvise(ptr, HPAGE_SIZE, MADV_WILLNEED);
705 	dprintf1("MADV_WILLNEED ret: %d\n", ret);
706 	memset(ptr, 0, HPAGE_SIZE);
707 
708 	dprintf1("mmap()'d thp for pkey %d @ %p\n", pkey, ptr);
709 	return ptr;
710 }
711 
712 static int hugetlb_setup_ok;
713 #define GET_NR_HUGE_PAGES 10
714 static void setup_hugetlbfs(void)
715 {
716 	long hpagesz_mb = HPAGE_SIZE / 1024 / 1024;
717 	unsigned long free_pages;
718 
719 	if (geteuid() != 0) {
720 		ksft_print_msg("WARNING: not run as root, can not do hugetlb test\n");
721 		return;
722 	}
723 
724 	/*
725 	 * Make sure that we got the pages and that they
726 	 * are PMD-level pages. Someone might have made PUD-level
727 	 * pages the default.
728 	 */
729 	hugetlb_save_settings();
730 	hugetlb_set_nr_pages(HPAGE_SIZE, GET_NR_HUGE_PAGES);
731 	free_pages = hugetlb_free_pages(HPAGE_SIZE);
732 	if (free_pages < GET_NR_HUGE_PAGES) {
733 		ksft_print_msg("could not confirm %ldM pages, got: '%lu' expected %d\n",
734 			       hpagesz_mb, free_pages, GET_NR_HUGE_PAGES);
735 		return;
736 	}
737 
738 	hugetlb_setup_ok = 1;
739 }
740 
741 static void *malloc_pkey_hugetlb(long size, int prot, u16 pkey)
742 {
743 	void *ptr;
744 	int flags = MAP_ANONYMOUS|MAP_PRIVATE|MAP_HUGETLB;
745 
746 	if (!hugetlb_setup_ok)
747 		return PTR_ERR_ENOTSUP;
748 
749 	dprintf1("doing %s(%ld, %x, %x)\n", __func__, size, prot, pkey);
750 	size = ALIGN_UP(size, HPAGE_SIZE * 2);
751 	pkey_assert(pkey < NR_PKEYS);
752 	ptr = mmap(NULL, size, PROT_NONE, flags, -1, 0);
753 	pkey_assert(ptr != (void *)-1);
754 	mprotect_pkey(ptr, size, prot, pkey);
755 
756 	record_pkey_malloc(ptr, size, prot);
757 
758 	dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n", pkey, ptr);
759 	return ptr;
760 }
761 
762 static void *(*pkey_malloc[])(long size, int prot, u16 pkey) = {
763 
764 	malloc_pkey_with_mprotect,
765 	malloc_pkey_with_mprotect_subpage,
766 	malloc_pkey_anon_huge,
767 	malloc_pkey_hugetlb
768 };
769 
770 static void *malloc_pkey(long size, int prot, u16 pkey)
771 {
772 	void *ret;
773 	static int malloc_type;
774 	int nr_malloc_types = ARRAY_SIZE(pkey_malloc);
775 
776 	pkey_assert(pkey < NR_PKEYS);
777 
778 	while (1) {
779 		pkey_assert(malloc_type < nr_malloc_types);
780 
781 		ret = pkey_malloc[malloc_type](size, prot, pkey);
782 		pkey_assert(ret != (void *)-1);
783 
784 		malloc_type++;
785 		if (malloc_type >= nr_malloc_types)
786 			malloc_type = (random()%nr_malloc_types);
787 
788 		/* try again if the malloc_type we tried is unsupported */
789 		if (ret == PTR_ERR_ENOTSUP)
790 			continue;
791 
792 		break;
793 	}
794 
795 	dprintf3("%s(%ld, prot=%x, pkey=%x) returning: %p\n", __func__,
796 			size, prot, pkey, ret);
797 	return ret;
798 }
799 
800 static int last_pkey_faults;
801 #define UNKNOWN_PKEY -2
802 void expected_pkey_fault(int pkey)
803 {
804 	dprintf2("%s(): last_pkey_faults: %d pkey_faults: %d\n",
805 			__func__, last_pkey_faults, pkey_faults);
806 	dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey);
807 	pkey_assert(last_pkey_faults + 1 == pkey_faults);
808 
809        /*
810 	* For exec-only memory, we do not know the pkey in
811 	* advance, so skip this check.
812 	*/
813 	if (pkey != UNKNOWN_PKEY)
814 		pkey_assert(last_si_pkey == pkey);
815 
816 #if defined(__i386__) || defined(__x86_64__) /* arch */
817 	/*
818 	 * The signal handler shold have cleared out PKEY register to let the
819 	 * test program continue.  We now have to restore it.
820 	 */
821 	if (__read_pkey_reg() != 0)
822 #elif defined(__aarch64__)
823 	if (__read_pkey_reg() != PKEY_REG_ALLOW_ALL)
824 #else
825 	if (__read_pkey_reg() != shadow_pkey_reg)
826 #endif /* arch */
827 		pkey_assert(0);
828 
829 	__write_pkey_reg(shadow_pkey_reg);
830 	dprintf1("%s() set pkey_reg=%016llx to restore state after signal "
831 		       "nuked it\n", __func__, shadow_pkey_reg);
832 	last_pkey_faults = pkey_faults;
833 	last_si_pkey = -1;
834 }
835 
836 #define do_not_expect_pkey_fault(msg)	do {			\
837 	if (last_pkey_faults != pkey_faults)			\
838 		dprintf0("# unexpected PKey fault: %s\n", msg);	\
839 	pkey_assert(last_pkey_faults == pkey_faults);		\
840 } while (0)
841 
842 static int test_fds[10] = { -1 };
843 static int nr_test_fds;
844 static void __save_test_fd(int fd)
845 {
846 	pkey_assert(fd >= 0);
847 	pkey_assert(nr_test_fds < ARRAY_SIZE(test_fds));
848 	test_fds[nr_test_fds] = fd;
849 	nr_test_fds++;
850 }
851 
852 static int get_test_read_fd(void)
853 {
854 	int test_fd = open("/etc/passwd", O_RDONLY);
855 	__save_test_fd(test_fd);
856 	return test_fd;
857 }
858 
859 static void close_test_fds(void)
860 {
861 	int i;
862 
863 	for (i = 0; i < nr_test_fds; i++) {
864 		if (test_fds[i] < 0)
865 			continue;
866 		close(test_fds[i]);
867 		test_fds[i] = -1;
868 	}
869 	nr_test_fds = 0;
870 }
871 
872 static void test_pkey_alloc_free_attach_pkey0(int *ptr, u16 pkey)
873 {
874 	int i, err;
875 	int max_nr_pkey_allocs;
876 	int alloced_pkeys[NR_PKEYS];
877 	int nr_alloced = 0;
878 	long size;
879 
880 	pkey_assert(pkey_last_malloc_record);
881 	size = pkey_last_malloc_record->size;
882 	/*
883 	 * This is a bit of a hack.  But mprotect() requires
884 	 * huge-page-aligned sizes when operating on hugetlbfs.
885 	 * So, make sure that we use something that's a multiple
886 	 * of a huge page when we can.
887 	 */
888 	if (size >= HPAGE_SIZE)
889 		size = HPAGE_SIZE;
890 
891 	/* allocate every possible key and make sure key-0 never got allocated */
892 	max_nr_pkey_allocs = NR_PKEYS;
893 	for (i = 0; i < max_nr_pkey_allocs; i++) {
894 		int new_pkey = alloc_pkey();
895 		pkey_assert(new_pkey != 0);
896 
897 		if (new_pkey < 0)
898 			break;
899 		alloced_pkeys[nr_alloced++] = new_pkey;
900 	}
901 	/* free all the allocated keys */
902 	for (i = 0; i < nr_alloced; i++) {
903 		int free_ret;
904 
905 		if (!alloced_pkeys[i])
906 			continue;
907 		free_ret = sys_pkey_free(alloced_pkeys[i]);
908 		pkey_assert(!free_ret);
909 	}
910 
911 	/* attach key-0 in various modes */
912 	err = sys_mprotect_pkey(ptr, size, PROT_READ, 0);
913 	pkey_assert(!err);
914 	err = sys_mprotect_pkey(ptr, size, PROT_WRITE, 0);
915 	pkey_assert(!err);
916 	err = sys_mprotect_pkey(ptr, size, PROT_EXEC, 0);
917 	pkey_assert(!err);
918 	err = sys_mprotect_pkey(ptr, size, PROT_READ|PROT_WRITE, 0);
919 	pkey_assert(!err);
920 	err = sys_mprotect_pkey(ptr, size, PROT_READ|PROT_WRITE|PROT_EXEC, 0);
921 	pkey_assert(!err);
922 }
923 
924 static void test_read_of_write_disabled_region(int *ptr, u16 pkey)
925 {
926 	int ptr_contents;
927 
928 	dprintf1("disabling write access to PKEY[1], doing read\n");
929 	pkey_write_deny(pkey);
930 	ptr_contents = read_ptr(ptr);
931 	dprintf1("*ptr: %d\n", ptr_contents);
932 	dprintf1("\n");
933 }
934 static void test_read_of_access_disabled_region(int *ptr, u16 pkey)
935 {
936 	int ptr_contents;
937 
938 	dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", pkey, ptr);
939 	read_pkey_reg();
940 	pkey_access_deny(pkey);
941 	ptr_contents = read_ptr(ptr);
942 	dprintf1("*ptr: %d\n", ptr_contents);
943 	expected_pkey_fault(pkey);
944 }
945 
946 static void test_read_of_access_disabled_region_with_page_already_mapped(int *ptr,
947 		u16 pkey)
948 {
949 	int ptr_contents;
950 
951 	dprintf1("disabling access to PKEY[%02d], doing read @ %p\n",
952 				pkey, ptr);
953 	ptr_contents = read_ptr(ptr);
954 	dprintf1("reading ptr before disabling the read : %d\n",
955 			ptr_contents);
956 	read_pkey_reg();
957 	pkey_access_deny(pkey);
958 	ptr_contents = read_ptr(ptr);
959 	dprintf1("*ptr: %d\n", ptr_contents);
960 	expected_pkey_fault(pkey);
961 }
962 
963 static void test_write_of_write_disabled_region_with_page_already_mapped(int *ptr,
964 		u16 pkey)
965 {
966 	*ptr = __LINE__;
967 	dprintf1("disabling write access; after accessing the page, "
968 		"to PKEY[%02d], doing write\n", pkey);
969 	pkey_write_deny(pkey);
970 	*ptr = __LINE__;
971 	expected_pkey_fault(pkey);
972 }
973 
974 static void test_write_of_write_disabled_region(int *ptr, u16 pkey)
975 {
976 	dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey);
977 	pkey_write_deny(pkey);
978 	*ptr = __LINE__;
979 	expected_pkey_fault(pkey);
980 }
981 static void test_write_of_access_disabled_region(int *ptr, u16 pkey)
982 {
983 	dprintf1("disabling access to PKEY[%02d], doing write\n", pkey);
984 	pkey_access_deny(pkey);
985 	*ptr = __LINE__;
986 	expected_pkey_fault(pkey);
987 }
988 
989 static void test_write_of_access_disabled_region_with_page_already_mapped(int *ptr,
990 			u16 pkey)
991 {
992 	*ptr = __LINE__;
993 	dprintf1("disabling access; after accessing the page, "
994 		" to PKEY[%02d], doing write\n", pkey);
995 	pkey_access_deny(pkey);
996 	*ptr = __LINE__;
997 	expected_pkey_fault(pkey);
998 }
999 
1000 static void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey)
1001 {
1002 	int ret;
1003 	int test_fd = get_test_read_fd();
1004 
1005 	dprintf1("disabling access to PKEY[%02d], "
1006 		 "having kernel read() to buffer\n", pkey);
1007 	pkey_access_deny(pkey);
1008 	ret = read(test_fd, ptr, 1);
1009 	dprintf1("read ret: %d\n", ret);
1010 	pkey_assert(ret);
1011 }
1012 
1013 static void test_kernel_write_of_write_disabled_region(int *ptr, u16 pkey)
1014 {
1015 	int ret;
1016 	int test_fd = get_test_read_fd();
1017 
1018 	pkey_write_deny(pkey);
1019 	ret = read(test_fd, ptr, 100);
1020 	dprintf1("read ret: %d\n", ret);
1021 	if (ret < 0 && (DEBUG_LEVEL > 0))
1022 		perror("verbose read result (OK for this to be bad)");
1023 	pkey_assert(ret);
1024 }
1025 
1026 static void test_kernel_gup_of_access_disabled_region(int *ptr, u16 pkey)
1027 {
1028 	int pipe_ret, vmsplice_ret;
1029 	struct iovec iov;
1030 	int pipe_fds[2];
1031 
1032 	pipe_ret = pipe(pipe_fds);
1033 
1034 	pkey_assert(pipe_ret == 0);
1035 	dprintf1("disabling access to PKEY[%02d], "
1036 		 "having kernel vmsplice from buffer\n", pkey);
1037 	pkey_access_deny(pkey);
1038 	iov.iov_base = ptr;
1039 	iov.iov_len = PAGE_SIZE;
1040 	vmsplice_ret = vmsplice(pipe_fds[1], &iov, 1, SPLICE_F_GIFT);
1041 	dprintf1("vmsplice() ret: %d\n", vmsplice_ret);
1042 	pkey_assert(vmsplice_ret == -1);
1043 
1044 	close(pipe_fds[0]);
1045 	close(pipe_fds[1]);
1046 }
1047 
1048 static void test_kernel_gup_write_to_write_disabled_region(int *ptr, u16 pkey)
1049 {
1050 	int ignored = 0xdada;
1051 	int futex_ret;
1052 	int some_int = __LINE__;
1053 
1054 	dprintf1("disabling write to PKEY[%02d], "
1055 		 "doing futex gunk in buffer\n", pkey);
1056 	*ptr = some_int;
1057 	pkey_write_deny(pkey);
1058 	futex_ret = syscall(SYS_futex, ptr, FUTEX_WAIT, some_int-1, NULL,
1059 			&ignored, ignored);
1060 	if (DEBUG_LEVEL > 0)
1061 		perror("futex");
1062 	dprintf1("futex() ret: %d\n", futex_ret);
1063 }
1064 
1065 /* Assumes that all pkeys other than 'pkey' are unallocated */
1066 static void test_pkey_syscalls_on_non_allocated_pkey(int *ptr, u16 pkey)
1067 {
1068 	int err;
1069 	int i;
1070 
1071 	/* Note: 0 is the default pkey, so don't mess with it */
1072 	for (i = 1; i < NR_PKEYS; i++) {
1073 		if (pkey == i)
1074 			continue;
1075 
1076 		dprintf1("trying get/set/free to non-allocated pkey: %2d\n", i);
1077 		err = sys_pkey_free(i);
1078 		pkey_assert(err);
1079 
1080 		err = sys_pkey_free(i);
1081 		pkey_assert(err);
1082 
1083 		err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, i);
1084 		pkey_assert(err);
1085 	}
1086 }
1087 
1088 /* Assumes that all pkeys other than 'pkey' are unallocated */
1089 static void test_pkey_syscalls_bad_args(int *ptr, u16 pkey)
1090 {
1091 	int err;
1092 	int bad_pkey = NR_PKEYS+99;
1093 
1094 	/* pass a known-invalid pkey in: */
1095 	err = sys_mprotect_pkey(ptr, PAGE_SIZE, PROT_READ, bad_pkey);
1096 	pkey_assert(err);
1097 }
1098 
1099 static void become_child(void)
1100 {
1101 	pid_t forkret;
1102 
1103 	forkret = fork();
1104 	pkey_assert(forkret >= 0);
1105 	dprintf3("[%d] fork() ret: %d\n", getpid(), forkret);
1106 
1107 	if (!forkret) {
1108 		/* in the child */
1109 		return;
1110 	}
1111 	_exit(0);
1112 }
1113 
1114 /* Assumes that all pkeys other than 'pkey' are unallocated */
1115 static void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
1116 {
1117 	int err;
1118 	int allocated_pkeys[NR_PKEYS] = {0};
1119 	int nr_allocated_pkeys = 0;
1120 	int i;
1121 
1122 	for (i = 0; i < NR_PKEYS*3; i++) {
1123 		int new_pkey;
1124 		dprintf1("%s() alloc loop: %d\n", __func__, i);
1125 		new_pkey = alloc_pkey();
1126 		dprintf4("%s()::%d, err: %d pkey_reg: 0x%016llx"
1127 				" shadow: 0x%016llx\n",
1128 				__func__, __LINE__, err, __read_pkey_reg(),
1129 				shadow_pkey_reg);
1130 		read_pkey_reg(); /* for shadow checking */
1131 		dprintf2("%s() errno: %d ENOSPC: %d\n", __func__, errno, ENOSPC);
1132 		if ((new_pkey == -1) && (errno == ENOSPC)) {
1133 			dprintf2("%s() failed to allocate pkey after %d tries\n",
1134 				__func__, nr_allocated_pkeys);
1135 		} else {
1136 			/*
1137 			 * Ensure the number of successes never
1138 			 * exceeds the number of keys supported
1139 			 * in the hardware.
1140 			 */
1141 			pkey_assert(nr_allocated_pkeys < NR_PKEYS);
1142 			allocated_pkeys[nr_allocated_pkeys++] = new_pkey;
1143 		}
1144 
1145 		/*
1146 		 * Make sure that allocation state is properly
1147 		 * preserved across fork().
1148 		 */
1149 		if (i == NR_PKEYS*2)
1150 			become_child();
1151 	}
1152 
1153 	dprintf3("%s()::%d\n", __func__, __LINE__);
1154 
1155 	/*
1156 	 * On x86:
1157 	 * There are 16 pkeys supported in hardware.  Three are
1158 	 * allocated by the time we get here:
1159 	 *   1. The default key (0)
1160 	 *   2. One possibly consumed by an execute-only mapping.
1161 	 *   3. One allocated by the test code and passed in via
1162 	 *      'pkey' to this function.
1163 	 * Ensure that we can allocate at least another 13 (16-3).
1164 	 *
1165 	 * On powerpc:
1166 	 * There are either 5, 28, 29 or 32 pkeys supported in
1167 	 * hardware depending on the page size (4K or 64K) and
1168 	 * platform (powernv or powervm). Four are allocated by
1169 	 * the time we get here. These include pkey-0, pkey-1,
1170 	 * exec-only pkey and the one allocated by the test code.
1171 	 * Ensure that we can allocate the remaining.
1172 	 */
1173 	pkey_assert(i >= (NR_PKEYS - get_arch_reserved_keys() - 1));
1174 
1175 	for (i = 0; i < nr_allocated_pkeys; i++) {
1176 		err = sys_pkey_free(allocated_pkeys[i]);
1177 		pkey_assert(!err);
1178 		read_pkey_reg(); /* for shadow checking */
1179 	}
1180 }
1181 
1182 static void arch_force_pkey_reg_init(void)
1183 {
1184 #if defined(__i386__) || defined(__x86_64__) /* arch */
1185 	u64 *buf;
1186 
1187 	/*
1188 	 * All keys should be allocated and set to allow reads and
1189 	 * writes, so the register should be all 0.  If not, just
1190 	 * skip the test.
1191 	 */
1192 	if (read_pkey_reg())
1193 		return;
1194 
1195 	/*
1196 	 * Just allocate an absurd about of memory rather than
1197 	 * doing the XSAVE size enumeration dance.
1198 	 */
1199 	buf = mmap(NULL, 1*MB, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
1200 
1201 	/* These __builtins require compiling with -mxsave */
1202 
1203 	/* XSAVE to build a valid buffer: */
1204 	__builtin_ia32_xsave(buf, XSTATE_PKEY);
1205 	/* Clear XSTATE_BV[PKRU]: */
1206 	buf[XSTATE_BV_OFFSET/sizeof(u64)] &= ~XSTATE_PKEY;
1207 	/* XRSTOR will likely get PKRU back to the init state: */
1208 	__builtin_ia32_xrstor(buf, XSTATE_PKEY);
1209 
1210 	munmap(buf, 1*MB);
1211 #endif
1212 }
1213 
1214 
1215 /*
1216  * This is mostly useless on ppc for now.  But it will not
1217  * hurt anything and should give some better coverage as
1218  * a long-running test that continually checks the pkey
1219  * register.
1220  */
1221 static void test_pkey_init_state(int *ptr, u16 pkey)
1222 {
1223 	int err;
1224 	int allocated_pkeys[NR_PKEYS] = {0};
1225 	int nr_allocated_pkeys = 0;
1226 	int i;
1227 
1228 	for (i = 0; i < NR_PKEYS; i++) {
1229 		int new_pkey = alloc_pkey();
1230 
1231 		if (new_pkey < 0)
1232 			continue;
1233 		allocated_pkeys[nr_allocated_pkeys++] = new_pkey;
1234 	}
1235 
1236 	dprintf3("%s()::%d\n", __func__, __LINE__);
1237 
1238 	arch_force_pkey_reg_init();
1239 
1240 	/*
1241 	 * Loop for a bit, hoping to get exercise the kernel
1242 	 * context switch code.
1243 	 */
1244 	for (i = 0; i < 1000000; i++)
1245 		read_pkey_reg();
1246 
1247 	for (i = 0; i < nr_allocated_pkeys; i++) {
1248 		err = sys_pkey_free(allocated_pkeys[i]);
1249 		pkey_assert(!err);
1250 		read_pkey_reg(); /* for shadow checking */
1251 	}
1252 }
1253 
1254 /*
1255  * pkey 0 is special.  It is allocated by default, so you do not
1256  * have to call pkey_alloc() to use it first.  Make sure that it
1257  * is usable.
1258  */
1259 static void test_mprotect_with_pkey_0(int *ptr, u16 pkey)
1260 {
1261 	long size;
1262 	int prot;
1263 
1264 	assert(pkey_last_malloc_record);
1265 	size = pkey_last_malloc_record->size;
1266 	/*
1267 	 * This is a bit of a hack.  But mprotect() requires
1268 	 * huge-page-aligned sizes when operating on hugetlbfs.
1269 	 * So, make sure that we use something that's a multiple
1270 	 * of a huge page when we can.
1271 	 */
1272 	if (size >= HPAGE_SIZE)
1273 		size = HPAGE_SIZE;
1274 	prot = pkey_last_malloc_record->prot;
1275 
1276 	/* Use pkey 0 */
1277 	mprotect_pkey(ptr, size, prot, 0);
1278 
1279 	/* Make sure that we can set it back to the original pkey. */
1280 	mprotect_pkey(ptr, size, prot, pkey);
1281 }
1282 
1283 static void test_ptrace_of_child(int *ptr, u16 pkey)
1284 {
1285 	__always_unused int peek_result;
1286 	pid_t child_pid;
1287 	void *ignored = 0;
1288 	long ret;
1289 	int status;
1290 	/*
1291 	 * This is the "control" for our little expermient.  Make sure
1292 	 * we can always access it when ptracing.
1293 	 */
1294 	int *plain_ptr_unaligned = malloc(HPAGE_SIZE);
1295 	int *plain_ptr = ALIGN_PTR_UP(plain_ptr_unaligned, PAGE_SIZE);
1296 
1297 	/*
1298 	 * Fork a child which is an exact copy of this process, of course.
1299 	 * That means we can do all of our tests via ptrace() and then plain
1300 	 * memory access and ensure they work differently.
1301 	 */
1302 	child_pid = fork_lazy_child();
1303 	dprintf1("[%d] child pid: %d\n", getpid(), child_pid);
1304 
1305 	ret = ptrace(PTRACE_ATTACH, child_pid, ignored, ignored);
1306 	if (ret)
1307 		perror("attach");
1308 	dprintf1("[%d] attach ret: %ld %d\n", getpid(), ret, __LINE__);
1309 	pkey_assert(ret != -1);
1310 	ret = waitpid(child_pid, &status, WUNTRACED);
1311 	if ((ret != child_pid) || !(WIFSTOPPED(status))) {
1312 		fprintf(stderr, "weird waitpid result %ld stat %x\n",
1313 				ret, status);
1314 		pkey_assert(0);
1315 	}
1316 	dprintf2("waitpid ret: %ld\n", ret);
1317 	dprintf2("waitpid status: %d\n", status);
1318 
1319 	pkey_access_deny(pkey);
1320 	pkey_write_deny(pkey);
1321 
1322 	/* Write access, untested for now:
1323 	ret = ptrace(PTRACE_POKEDATA, child_pid, peek_at, data);
1324 	pkey_assert(ret != -1);
1325 	dprintf1("poke at %p: %ld\n", peek_at, ret);
1326 	*/
1327 
1328 	/*
1329 	 * Try to access the pkey-protected "ptr" via ptrace:
1330 	 */
1331 	ret = ptrace(PTRACE_PEEKDATA, child_pid, ptr, ignored);
1332 	/* expect it to work, without an error: */
1333 	pkey_assert(ret != -1);
1334 	/* Now access from the current task, and expect an exception: */
1335 	peek_result = read_ptr(ptr);
1336 	expected_pkey_fault(pkey);
1337 
1338 	/*
1339 	 * Try to access the NON-pkey-protected "plain_ptr" via ptrace:
1340 	 */
1341 	ret = ptrace(PTRACE_PEEKDATA, child_pid, plain_ptr, ignored);
1342 	/* expect it to work, without an error: */
1343 	pkey_assert(ret != -1);
1344 	/* Now access from the current task, and expect NO exception: */
1345 	peek_result = read_ptr(plain_ptr);
1346 	do_not_expect_pkey_fault("read plain pointer after ptrace");
1347 
1348 	ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0);
1349 	pkey_assert(ret != -1);
1350 
1351 	ret = kill(child_pid, SIGKILL);
1352 	pkey_assert(ret != -1);
1353 
1354 	wait(&status);
1355 
1356 	free(plain_ptr_unaligned);
1357 }
1358 
1359 static void *get_pointer_to_instructions(void)
1360 {
1361 	void *p1;
1362 
1363 	p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE);
1364 	dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write);
1365 	/* lots_o_noops_around_write should be page-aligned already */
1366 	assert(p1 == &lots_o_noops_around_write);
1367 
1368 	/* Point 'p1' at the *second* page of the function: */
1369 	p1 += PAGE_SIZE;
1370 
1371 	/*
1372 	 * Try to ensure we fault this in on next touch to ensure
1373 	 * we get an instruction fault as opposed to a data one
1374 	 */
1375 	madvise(p1, PAGE_SIZE, MADV_DONTNEED);
1376 
1377 	return p1;
1378 }
1379 
1380 static void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
1381 {
1382 	void *p1;
1383 	int scratch;
1384 	int ptr_contents;
1385 	int ret;
1386 
1387 	p1 = get_pointer_to_instructions();
1388 	lots_o_noops_around_write(&scratch);
1389 	ptr_contents = read_ptr(p1);
1390 	dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
1391 
1392 	ret = mprotect_pkey(p1, PAGE_SIZE, PROT_EXEC, (u64)pkey);
1393 	pkey_assert(!ret);
1394 	pkey_access_deny(pkey);
1395 
1396 	dprintf2("pkey_reg: %016llx\n", read_pkey_reg());
1397 
1398 	/*
1399 	 * Make sure this is an *instruction* fault
1400 	 */
1401 	madvise(p1, PAGE_SIZE, MADV_DONTNEED);
1402 	lots_o_noops_around_write(&scratch);
1403 	do_not_expect_pkey_fault("executing on PROT_EXEC memory");
1404 	expect_fault_on_read_execonly_key(p1, pkey);
1405 
1406 	// Reset back to PROT_EXEC | PROT_READ for architectures that support
1407 	// non-PKEY execute-only permissions.
1408 	ret = mprotect_pkey(p1, PAGE_SIZE, PROT_EXEC | PROT_READ, (u64)pkey);
1409 	pkey_assert(!ret);
1410 }
1411 
1412 static void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
1413 {
1414 	void *p1;
1415 	int scratch;
1416 	int ptr_contents;
1417 	int ret;
1418 
1419 	dprintf1("%s() start\n", __func__);
1420 
1421 	p1 = get_pointer_to_instructions();
1422 	lots_o_noops_around_write(&scratch);
1423 	ptr_contents = read_ptr(p1);
1424 	dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
1425 
1426 	/* Use a *normal* mprotect(), not mprotect_pkey(): */
1427 	ret = mprotect(p1, PAGE_SIZE, PROT_EXEC);
1428 	pkey_assert(!ret);
1429 
1430 	/*
1431 	 * Reset the shadow, assuming that the above mprotect()
1432 	 * correctly changed PKRU, but to an unknown value since
1433 	 * the actual allocated pkey is unknown.
1434 	 */
1435 	shadow_pkey_reg = __read_pkey_reg();
1436 
1437 	dprintf2("pkey_reg: %016llx\n", read_pkey_reg());
1438 
1439 	/* Make sure this is an *instruction* fault */
1440 	madvise(p1, PAGE_SIZE, MADV_DONTNEED);
1441 	lots_o_noops_around_write(&scratch);
1442 	do_not_expect_pkey_fault("executing on PROT_EXEC memory");
1443 	expect_fault_on_read_execonly_key(p1, UNKNOWN_PKEY);
1444 
1445 	/*
1446 	 * Put the memory back to non-PROT_EXEC.  Should clear the
1447 	 * exec-only pkey off the VMA and allow it to be readable
1448 	 * again.  Go to PROT_NONE first to check for a kernel bug
1449 	 * that did not clear the pkey when doing PROT_NONE.
1450 	 */
1451 	ret = mprotect(p1, PAGE_SIZE, PROT_NONE);
1452 	pkey_assert(!ret);
1453 
1454 	ret = mprotect(p1, PAGE_SIZE, PROT_READ|PROT_EXEC);
1455 	pkey_assert(!ret);
1456 	ptr_contents = read_ptr(p1);
1457 	do_not_expect_pkey_fault("plain read on recently PROT_EXEC area");
1458 }
1459 
1460 #if defined(__i386__) || defined(__x86_64__)
1461 static void test_ptrace_modifies_pkru(int *ptr, u16 pkey)
1462 {
1463 	u32 new_pkru;
1464 	pid_t child;
1465 	int status, ret;
1466 	int pkey_offset = pkey_reg_xstate_offset();
1467 	size_t xsave_size = cpu_max_xsave_size();
1468 	void *xsave;
1469 	u32 *pkey_register;
1470 	u64 *xstate_bv;
1471 	struct iovec iov;
1472 
1473 	new_pkru = ~read_pkey_reg();
1474 	/* Don't make PROT_EXEC mappings inaccessible */
1475 	new_pkru &= ~3;
1476 
1477 	child = fork();
1478 	pkey_assert(child >= 0);
1479 	dprintf3("[%d] fork() ret: %d\n", getpid(), child);
1480 	if (!child) {
1481 		ptrace(PTRACE_TRACEME, 0, 0, 0);
1482 		/* Stop and allow the tracer to modify PKRU directly */
1483 		raise(SIGSTOP);
1484 
1485 		/*
1486 		 * need __read_pkey_reg() version so we do not do shadow_pkey_reg
1487 		 * checking
1488 		 */
1489 		if (__read_pkey_reg() != new_pkru)
1490 			_exit(1);
1491 
1492 		/* Stop and allow the tracer to clear XSTATE_BV for PKRU */
1493 		raise(SIGSTOP);
1494 
1495 		if (__read_pkey_reg() != 0)
1496 			_exit(1);
1497 
1498 		/* Stop and allow the tracer to examine PKRU */
1499 		raise(SIGSTOP);
1500 
1501 		_exit(0);
1502 	}
1503 
1504 	pkey_assert(child == waitpid(child, &status, 0));
1505 	dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
1506 	pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
1507 
1508 	xsave = (void *)malloc(xsave_size);
1509 	pkey_assert(xsave > 0);
1510 
1511 	/* Modify the PKRU register directly */
1512 	iov.iov_base = xsave;
1513 	iov.iov_len = xsave_size;
1514 	ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov);
1515 	pkey_assert(ret == 0);
1516 
1517 	pkey_register = (u32 *)(xsave + pkey_offset);
1518 	pkey_assert(*pkey_register == read_pkey_reg());
1519 
1520 	*pkey_register = new_pkru;
1521 
1522 	ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_X86_XSTATE, &iov);
1523 	pkey_assert(ret == 0);
1524 
1525 	/* Test that the modification is visible in ptrace before any execution */
1526 	memset(xsave, 0xCC, xsave_size);
1527 	ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov);
1528 	pkey_assert(ret == 0);
1529 	pkey_assert(*pkey_register == new_pkru);
1530 
1531 	/* Execute the tracee */
1532 	ret = ptrace(PTRACE_CONT, child, 0, 0);
1533 	pkey_assert(ret == 0);
1534 
1535 	/* Test that the tracee saw the PKRU value change */
1536 	pkey_assert(child == waitpid(child, &status, 0));
1537 	dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
1538 	pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
1539 
1540 	/* Test that the modification is visible in ptrace after execution */
1541 	memset(xsave, 0xCC, xsave_size);
1542 	ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov);
1543 	pkey_assert(ret == 0);
1544 	pkey_assert(*pkey_register == new_pkru);
1545 
1546 	/* Clear the PKRU bit from XSTATE_BV */
1547 	xstate_bv = (u64 *)(xsave + 512);
1548 	*xstate_bv &= ~(1 << 9);
1549 
1550 	ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_X86_XSTATE, &iov);
1551 	pkey_assert(ret == 0);
1552 
1553 	/* Test that the modification is visible in ptrace before any execution */
1554 	memset(xsave, 0xCC, xsave_size);
1555 	ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov);
1556 	pkey_assert(ret == 0);
1557 	pkey_assert(*pkey_register == 0);
1558 
1559 	ret = ptrace(PTRACE_CONT, child, 0, 0);
1560 	pkey_assert(ret == 0);
1561 
1562 	/* Test that the tracee saw the PKRU value go to 0 */
1563 	pkey_assert(child == waitpid(child, &status, 0));
1564 	dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
1565 	pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
1566 
1567 	/* Test that the modification is visible in ptrace after execution */
1568 	memset(xsave, 0xCC, xsave_size);
1569 	ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov);
1570 	pkey_assert(ret == 0);
1571 	pkey_assert(*pkey_register == 0);
1572 
1573 	ret = ptrace(PTRACE_CONT, child, 0, 0);
1574 	pkey_assert(ret == 0);
1575 	pkey_assert(child == waitpid(child, &status, 0));
1576 	dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
1577 	pkey_assert(WIFEXITED(status));
1578 	pkey_assert(WEXITSTATUS(status) == 0);
1579 	free(xsave);
1580 }
1581 #endif
1582 
1583 #if defined(__aarch64__)
1584 static void test_ptrace_modifies_pkru(int *ptr, u16 pkey)
1585 {
1586 	pid_t child;
1587 	int status, ret;
1588 	struct iovec iov;
1589 	u64 trace_pkey;
1590 	/* Just a random pkey value.. */
1591 	u64 new_pkey = (POE_X << PKEY_BITS_PER_PKEY * 2) |
1592 			(POE_NONE << PKEY_BITS_PER_PKEY) |
1593 			POE_RWX;
1594 
1595 	child = fork();
1596 	pkey_assert(child >= 0);
1597 	dprintf3("[%d] fork() ret: %d\n", getpid(), child);
1598 	if (!child) {
1599 		ptrace(PTRACE_TRACEME, 0, 0, 0);
1600 
1601 		/* Stop and allow the tracer to modify PKRU directly */
1602 		raise(SIGSTOP);
1603 
1604 		/*
1605 		 * need __read_pkey_reg() version so we do not do shadow_pkey_reg
1606 		 * checking
1607 		 */
1608 		if (__read_pkey_reg() != new_pkey)
1609 			exit(1);
1610 
1611 		raise(SIGSTOP);
1612 
1613 		exit(0);
1614 	}
1615 
1616 	pkey_assert(child == waitpid(child, &status, 0));
1617 	dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
1618 	pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
1619 
1620 	iov.iov_base = &trace_pkey;
1621 	iov.iov_len = 8;
1622 	ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_ARM_POE, &iov);
1623 	pkey_assert(ret == 0);
1624 	pkey_assert(trace_pkey == read_pkey_reg());
1625 
1626 	trace_pkey = new_pkey;
1627 
1628 	ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_ARM_POE, &iov);
1629 	pkey_assert(ret == 0);
1630 
1631 	/* Test that the modification is visible in ptrace before any execution */
1632 	memset(&trace_pkey, 0, sizeof(trace_pkey));
1633 	ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_ARM_POE, &iov);
1634 	pkey_assert(ret == 0);
1635 	pkey_assert(trace_pkey == new_pkey);
1636 
1637 	/* Execute the tracee */
1638 	ret = ptrace(PTRACE_CONT, child, 0, 0);
1639 	pkey_assert(ret == 0);
1640 
1641 	/* Test that the tracee saw the PKRU value change */
1642 	pkey_assert(child == waitpid(child, &status, 0));
1643 	dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
1644 	pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
1645 
1646 	/* Test that the modification is visible in ptrace after execution */
1647 	memset(&trace_pkey, 0, sizeof(trace_pkey));
1648 	ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_ARM_POE, &iov);
1649 	pkey_assert(ret == 0);
1650 	pkey_assert(trace_pkey == new_pkey);
1651 
1652 	ret = ptrace(PTRACE_CONT, child, 0, 0);
1653 	pkey_assert(ret == 0);
1654 	pkey_assert(child == waitpid(child, &status, 0));
1655 	dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
1656 	pkey_assert(WIFEXITED(status));
1657 	pkey_assert(WEXITSTATUS(status) == 0);
1658 }
1659 #endif
1660 
1661 static void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
1662 {
1663 	int size = PAGE_SIZE;
1664 	int sret;
1665 
1666 	if (cpu_has_pkeys()) {
1667 		dprintf1("SKIP: %s: no CPU support\n", __func__);
1668 		return;
1669 	}
1670 
1671 	sret = syscall(__NR_pkey_mprotect, ptr, size, PROT_READ, pkey);
1672 	pkey_assert(sret < 0);
1673 }
1674 
1675 struct pkey_test {
1676 	void (*func)(int *ptr, u16 pkey);
1677 	const char *name;
1678 };
1679 
1680 #define PKEY_TEST(fn) { fn, #fn }
1681 
1682 static struct pkey_test pkey_tests[] = {
1683 	PKEY_TEST(test_read_of_write_disabled_region),
1684 	PKEY_TEST(test_read_of_access_disabled_region),
1685 	PKEY_TEST(test_read_of_access_disabled_region_with_page_already_mapped),
1686 	PKEY_TEST(test_write_of_write_disabled_region),
1687 	PKEY_TEST(test_write_of_write_disabled_region_with_page_already_mapped),
1688 	PKEY_TEST(test_write_of_access_disabled_region),
1689 	PKEY_TEST(test_write_of_access_disabled_region_with_page_already_mapped),
1690 	PKEY_TEST(test_kernel_write_of_access_disabled_region),
1691 	PKEY_TEST(test_kernel_write_of_write_disabled_region),
1692 	PKEY_TEST(test_kernel_gup_of_access_disabled_region),
1693 	PKEY_TEST(test_kernel_gup_write_to_write_disabled_region),
1694 	PKEY_TEST(test_executing_on_unreadable_memory),
1695 	PKEY_TEST(test_implicit_mprotect_exec_only_memory),
1696 	PKEY_TEST(test_mprotect_with_pkey_0),
1697 	PKEY_TEST(test_ptrace_of_child),
1698 	PKEY_TEST(test_pkey_init_state),
1699 	PKEY_TEST(test_pkey_syscalls_on_non_allocated_pkey),
1700 	PKEY_TEST(test_pkey_syscalls_bad_args),
1701 	PKEY_TEST(test_pkey_alloc_exhaust),
1702 	PKEY_TEST(test_pkey_alloc_free_attach_pkey0),
1703 #if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__)
1704 	PKEY_TEST(test_ptrace_modifies_pkru),
1705 #endif
1706 };
1707 
1708 static void run_tests_once(void)
1709 {
1710 	int *ptr;
1711 	int prot = PROT_READ|PROT_WRITE;
1712 
1713 	for (test_nr = 0; test_nr < ARRAY_SIZE(pkey_tests); test_nr++) {
1714 		int pkey;
1715 		int orig_pkey_faults = pkey_faults;
1716 
1717 		dprintf1("======================\n");
1718 		dprintf1("test %d preparing...\n", test_nr);
1719 
1720 		tracing_on();
1721 		pkey = alloc_random_pkey();
1722 		dprintf1("test %d starting with pkey: %d\n", test_nr, pkey);
1723 		ptr = malloc_pkey(PAGE_SIZE, prot, pkey);
1724 		dprintf1("test %d starting...\n", test_nr);
1725 		pkey_tests[test_nr].func(ptr, pkey);
1726 		dprintf1("freeing test memory: %p\n", ptr);
1727 		free_pkey_malloc(ptr);
1728 		sys_pkey_free(pkey);
1729 
1730 		dprintf1("pkey_faults: %d\n", pkey_faults);
1731 		dprintf1("orig_pkey_faults: %d\n", orig_pkey_faults);
1732 
1733 		tracing_off();
1734 		close_test_fds();
1735 
1736 		ksft_test_result_pass("test %s (iteration %d)\n", pkey_tests[test_nr].name, iteration_nr);
1737 		dprintf1("======================\n\n");
1738 	}
1739 	iteration_nr++;
1740 }
1741 
1742 static void pkey_setup_shadow(void)
1743 {
1744 	shadow_pkey_reg = __read_pkey_reg();
1745 }
1746 
1747 int main(void)
1748 {
1749 	int nr_iterations = 22;
1750 	int pkeys_supported = is_pkeys_supported();
1751 
1752 	srand((unsigned int)time(NULL));
1753 
1754 	setup_handlers();
1755 
1756 	ksft_print_header();
1757 
1758 	if (!pkeys_supported) {
1759 		int size = PAGE_SIZE;
1760 		int *ptr;
1761 
1762 		ksft_set_plan(1);
1763 		ksft_print_msg("running PKEY tests for unsupported CPU/OS\n");
1764 
1765 		ptr  = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
1766 		assert(ptr != (void *)-1);
1767 		test_mprotect_pkey_on_unsupported_cpu(ptr, 1);
1768 		ksft_test_result_pass("pkey on unsupported CPU/OS\n");
1769 		ksft_finished();
1770 	}
1771 
1772 	ksft_set_plan(ARRAY_SIZE(pkey_tests) * nr_iterations);
1773 
1774 	pkey_setup_shadow();
1775 	ksft_print_msg("startup pkey_reg: %016llx\n", read_pkey_reg());
1776 	setup_hugetlbfs();
1777 
1778 	while (nr_iterations-- > 0)
1779 		run_tests_once();
1780 
1781 	ksft_finished();
1782 }
1783