xref: /linux/tools/testing/selftests/x86/lam.c (revision ba6ec09911b805778a2fed6d626bfe77b011a717)
1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
6 #include <sys/syscall.h>
7 #include <sys/ioctl.h>
8 #include <time.h>
9 #include <signal.h>
10 #include <setjmp.h>
11 #include <sys/mman.h>
12 #include <sys/utsname.h>
13 #include <sys/wait.h>
14 #include <sys/stat.h>
15 #include <fcntl.h>
16 #include <inttypes.h>
17 #include <sched.h>
18 
19 #include <sys/uio.h>
20 #include <linux/io_uring.h>
21 #include "../kselftest.h"
22 
23 #ifndef __x86_64__
24 # error This test is 64-bit only
25 #endif
26 
27 /* LAM modes, these definitions were copied from kernel code */
28 #define LAM_NONE                0
29 #define LAM_U57_BITS            6
30 
31 #define LAM_U57_MASK            (0x3fULL << 57)
32 /* arch prctl for LAM */
33 #define ARCH_GET_UNTAG_MASK     0x4001
34 #define ARCH_ENABLE_TAGGED_ADDR 0x4002
35 #define ARCH_GET_MAX_TAG_BITS   0x4003
36 #define ARCH_FORCE_TAGGED_SVA	0x4004
37 
38 /* Specified test function bits */
39 #define FUNC_MALLOC             0x1
40 #define FUNC_BITS               0x2
41 #define FUNC_MMAP               0x4
42 #define FUNC_SYSCALL            0x8
43 #define FUNC_URING              0x10
44 #define FUNC_INHERITE           0x20
45 #define FUNC_PASID              0x40
46 
47 /* get_user() pointer test cases */
48 #define GET_USER_USER           0
49 #define GET_USER_KERNEL_TOP     1
50 #define GET_USER_KERNEL_BOT     2
51 #define GET_USER_KERNEL         3
52 
53 #define TEST_MASK               0x7f
54 #define L5_SIGN_EXT_MASK        (0xFFUL << 56)
55 #define L4_SIGN_EXT_MASK        (0x1FFFFUL << 47)
56 
57 #define LOW_ADDR                (0x1UL << 30)
58 #define HIGH_ADDR               (0x3UL << 48)
59 
60 #define MALLOC_LEN              32
61 
62 #define PAGE_SIZE               (4 << 10)
63 
64 #define STACK_SIZE		65536
65 
66 #define barrier() ({						\
67 		   __asm__ __volatile__("" : : : "memory");	\
68 })
69 
70 #define URING_QUEUE_SZ 1
71 #define URING_BLOCK_SZ 2048
72 
73 /* Pasid test define */
74 #define LAM_CMD_BIT 0x1
75 #define PAS_CMD_BIT 0x2
76 #define SVA_CMD_BIT 0x4
77 
78 #define PAS_CMD(cmd1, cmd2, cmd3) (((cmd3) << 8) | ((cmd2) << 4) | ((cmd1) << 0))
79 
80 struct testcases {
81 	unsigned int later;
82 	int expected; /* 2: SIGSEGV Error; 1: other errors */
83 	unsigned long lam;
84 	uint64_t addr;
85 	uint64_t cmd;
86 	int (*test_func)(struct testcases *test);
87 	const char *msg;
88 };
89 
90 /* Used by CQ of uring, source file handler and file's size */
91 struct file_io {
92 	int file_fd;
93 	off_t file_sz;
94 	struct iovec iovecs[];
95 };
96 
97 struct io_uring_queue {
98 	unsigned int *head;
99 	unsigned int *tail;
100 	unsigned int *ring_mask;
101 	unsigned int *ring_entries;
102 	unsigned int *flags;
103 	unsigned int *array;
104 	union {
105 		struct io_uring_cqe *cqes;
106 		struct io_uring_sqe *sqes;
107 	} queue;
108 	size_t ring_sz;
109 };
110 
111 struct io_ring {
112 	int ring_fd;
113 	struct io_uring_queue sq_ring;
114 	struct io_uring_queue cq_ring;
115 };
116 
117 int tests_cnt;
118 jmp_buf segv_env;
119 
segv_handler(int sig)120 static void segv_handler(int sig)
121 {
122 	ksft_print_msg("Get segmentation fault(%d).", sig);
123 
124 	siglongjmp(segv_env, 1);
125 }
126 
lam_is_available(void)127 static inline int lam_is_available(void)
128 {
129 	unsigned int cpuinfo[4];
130 	unsigned long bits = 0;
131 	int ret;
132 
133 	__cpuid_count(0x7, 1, cpuinfo[0], cpuinfo[1], cpuinfo[2], cpuinfo[3]);
134 
135 	/* Check if cpu supports LAM */
136 	if (!(cpuinfo[0] & (1 << 26))) {
137 		ksft_print_msg("LAM is not supported!\n");
138 		return 0;
139 	}
140 
141 	/* Return 0 if CONFIG_ADDRESS_MASKING is not set */
142 	ret = syscall(SYS_arch_prctl, ARCH_GET_MAX_TAG_BITS, &bits);
143 	if (ret) {
144 		ksft_print_msg("LAM is disabled in the kernel!\n");
145 		return 0;
146 	}
147 
148 	return 1;
149 }
150 
la57_enabled(void)151 static inline int la57_enabled(void)
152 {
153 	int ret;
154 	void *p;
155 
156 	p = mmap((void *)HIGH_ADDR, PAGE_SIZE, PROT_READ | PROT_WRITE,
157 		 MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
158 
159 	ret = p == MAP_FAILED ? 0 : 1;
160 
161 	munmap(p, PAGE_SIZE);
162 	return ret;
163 }
164 
165 /*
166  * Set tagged address and read back untag mask.
167  * check if the untagged mask is expected.
168  *
169  * @return:
170  * 0: Set LAM mode successfully
171  * others: failed to set LAM
172  */
set_lam(unsigned long lam)173 static int set_lam(unsigned long lam)
174 {
175 	int ret = 0;
176 	uint64_t ptr = 0;
177 
178 	if (lam != LAM_U57_BITS && lam != LAM_NONE)
179 		return -1;
180 
181 	/* Skip check return */
182 	syscall(SYS_arch_prctl, ARCH_ENABLE_TAGGED_ADDR, lam);
183 
184 	/* Get untagged mask */
185 	syscall(SYS_arch_prctl, ARCH_GET_UNTAG_MASK, &ptr);
186 
187 	/* Check mask returned is expected */
188 	if (lam == LAM_U57_BITS)
189 		ret = (ptr != ~(LAM_U57_MASK));
190 	else if (lam == LAM_NONE)
191 		ret = (ptr != -1ULL);
192 
193 	return ret;
194 }
195 
get_default_tag_bits(void)196 static unsigned long get_default_tag_bits(void)
197 {
198 	pid_t pid;
199 	int lam = LAM_NONE;
200 	int ret = 0;
201 
202 	pid = fork();
203 	if (pid < 0) {
204 		perror("Fork failed.");
205 	} else if (pid == 0) {
206 		/* Set LAM mode in child process */
207 		if (set_lam(LAM_U57_BITS) == 0)
208 			lam = LAM_U57_BITS;
209 		else
210 			lam = LAM_NONE;
211 		exit(lam);
212 	} else {
213 		wait(&ret);
214 		lam = WEXITSTATUS(ret);
215 	}
216 
217 	return lam;
218 }
219 
220 /*
221  * Set tagged address and read back untag mask.
222  * check if the untag mask is expected.
223  */
get_lam(void)224 static int get_lam(void)
225 {
226 	uint64_t ptr = 0;
227 	int ret = -1;
228 	/* Get untagged mask */
229 	if (syscall(SYS_arch_prctl, ARCH_GET_UNTAG_MASK, &ptr) == -1)
230 		return -1;
231 
232 	/* Check mask returned is expected */
233 	if (ptr == ~(LAM_U57_MASK))
234 		ret = LAM_U57_BITS;
235 	else if (ptr == -1ULL)
236 		ret = LAM_NONE;
237 
238 
239 	return ret;
240 }
241 
242 /* According to LAM mode, set metadata in high bits */
set_metadata(uint64_t src,unsigned long lam)243 static uint64_t set_metadata(uint64_t src, unsigned long lam)
244 {
245 	uint64_t metadata;
246 
247 	srand(time(NULL));
248 
249 	switch (lam) {
250 	case LAM_U57_BITS: /* Set metadata in bits 62:57 */
251 		/* Get a random non-zero value as metadata */
252 		metadata = (rand() % ((1UL << LAM_U57_BITS) - 1) + 1) << 57;
253 		metadata |= (src & ~(LAM_U57_MASK));
254 		break;
255 	default:
256 		metadata = src;
257 		break;
258 	}
259 
260 	return metadata;
261 }
262 
263 /*
264  * Set metadata in user pointer, compare new pointer with original pointer.
265  * both pointers should point to the same address.
266  *
267  * @return:
268  * 0: value on the pointer with metadata and value on original are same
269  * 1: not same.
270  */
handle_lam_test(void * src,unsigned int lam)271 static int handle_lam_test(void *src, unsigned int lam)
272 {
273 	char *ptr;
274 
275 	strcpy((char *)src, "USER POINTER");
276 
277 	ptr = (char *)set_metadata((uint64_t)src, lam);
278 	if (src == ptr)
279 		return 0;
280 
281 	/* Copy a string into the pointer with metadata */
282 	strcpy((char *)ptr, "METADATA POINTER");
283 
284 	return (!!strcmp((char *)src, (char *)ptr));
285 }
286 
287 
handle_max_bits(struct testcases * test)288 int handle_max_bits(struct testcases *test)
289 {
290 	unsigned long exp_bits = get_default_tag_bits();
291 	unsigned long bits = 0;
292 
293 	if (exp_bits != LAM_NONE)
294 		exp_bits = LAM_U57_BITS;
295 
296 	/* Get LAM max tag bits */
297 	if (syscall(SYS_arch_prctl, ARCH_GET_MAX_TAG_BITS, &bits) == -1)
298 		return 1;
299 
300 	return (exp_bits != bits);
301 }
302 
303 /*
304  * Test lam feature through dereference pointer get from malloc.
305  * @return 0: Pass test. 1: Get failure during test 2: Get SIGSEGV
306  */
handle_malloc(struct testcases * test)307 static int handle_malloc(struct testcases *test)
308 {
309 	char *ptr = NULL;
310 	int ret = 0;
311 
312 	if (test->later == 0 && test->lam != 0)
313 		if (set_lam(test->lam) == -1)
314 			return 1;
315 
316 	ptr = (char *)malloc(MALLOC_LEN);
317 	if (ptr == NULL) {
318 		perror("malloc() failure\n");
319 		return 1;
320 	}
321 
322 	/* Set signal handler */
323 	if (sigsetjmp(segv_env, 1) == 0) {
324 		signal(SIGSEGV, segv_handler);
325 		ret = handle_lam_test(ptr, test->lam);
326 	} else {
327 		ret = 2;
328 	}
329 
330 	if (test->later != 0 && test->lam != 0)
331 		if (set_lam(test->lam) == -1 && ret == 0)
332 			ret = 1;
333 
334 	free(ptr);
335 
336 	return ret;
337 }
338 
handle_mmap(struct testcases * test)339 static int handle_mmap(struct testcases *test)
340 {
341 	void *ptr;
342 	unsigned int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED;
343 	int ret = 0;
344 
345 	if (test->later == 0 && test->lam != 0)
346 		if (set_lam(test->lam) != 0)
347 			return 1;
348 
349 	ptr = mmap((void *)test->addr, PAGE_SIZE, PROT_READ | PROT_WRITE,
350 		   flags, -1, 0);
351 	if (ptr == MAP_FAILED) {
352 		if (test->addr == HIGH_ADDR)
353 			if (!la57_enabled())
354 				return 3; /* unsupport LA57 */
355 		return 1;
356 	}
357 
358 	if (test->later != 0 && test->lam != 0)
359 		if (set_lam(test->lam) != 0)
360 			ret = 1;
361 
362 	if (ret == 0) {
363 		if (sigsetjmp(segv_env, 1) == 0) {
364 			signal(SIGSEGV, segv_handler);
365 			ret = handle_lam_test(ptr, test->lam);
366 		} else {
367 			ret = 2;
368 		}
369 	}
370 
371 	munmap(ptr, PAGE_SIZE);
372 	return ret;
373 }
374 
handle_syscall(struct testcases * test)375 static int handle_syscall(struct testcases *test)
376 {
377 	struct utsname unme, *pu;
378 	int ret = 0;
379 
380 	if (test->later == 0 && test->lam != 0)
381 		if (set_lam(test->lam) != 0)
382 			return 1;
383 
384 	if (sigsetjmp(segv_env, 1) == 0) {
385 		signal(SIGSEGV, segv_handler);
386 		pu = (struct utsname *)set_metadata((uint64_t)&unme, test->lam);
387 		ret = uname(pu);
388 		if (ret < 0)
389 			ret = 1;
390 	} else {
391 		ret = 2;
392 	}
393 
394 	if (test->later != 0 && test->lam != 0)
395 		if (set_lam(test->lam) != -1 && ret == 0)
396 			ret = 1;
397 
398 	return ret;
399 }
400 
get_user_syscall(struct testcases * test)401 static int get_user_syscall(struct testcases *test)
402 {
403 	uint64_t ptr_address, bitmask;
404 	int fd, ret = 0;
405 	void *ptr;
406 
407 	if (la57_enabled()) {
408 		bitmask = L5_SIGN_EXT_MASK;
409 		ptr_address = HIGH_ADDR;
410 	} else {
411 		bitmask = L4_SIGN_EXT_MASK;
412 		ptr_address = LOW_ADDR;
413 	}
414 
415 	ptr = mmap((void *)ptr_address, PAGE_SIZE, PROT_READ | PROT_WRITE,
416 		   MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
417 
418 	if (ptr == MAP_FAILED) {
419 		perror("failed to map byte to pass into get_user");
420 		return 1;
421 	}
422 
423 	if (set_lam(test->lam) != 0) {
424 		ret = 2;
425 		goto error;
426 	}
427 
428 	fd = memfd_create("lam_ioctl", 0);
429 	if (fd == -1) {
430 		munmap(ptr, PAGE_SIZE);
431 		exit(EXIT_FAILURE);
432 	}
433 
434 	switch (test->later) {
435 	case GET_USER_USER:
436 		/* Control group - properly tagged user pointer */
437 		ptr = (void *)set_metadata((uint64_t)ptr, test->lam);
438 		break;
439 	case GET_USER_KERNEL_TOP:
440 		/* Kernel address with top bit cleared */
441 		bitmask &= (bitmask >> 1);
442 		ptr = (void *)((uint64_t)ptr | bitmask);
443 		break;
444 	case GET_USER_KERNEL_BOT:
445 		/* Kernel address with bottom sign-extension bit cleared */
446 		bitmask &= (bitmask << 1);
447 		ptr = (void *)((uint64_t)ptr | bitmask);
448 		break;
449 	case GET_USER_KERNEL:
450 		/* Try to pass a kernel address */
451 		ptr = (void *)((uint64_t)ptr | bitmask);
452 		break;
453 	default:
454 		printf("Invalid test case value passed!\n");
455 		break;
456 	}
457 
458 	/*
459 	 * Use FIOASYNC ioctl because it utilizes get_user() internally and is
460 	 * very non-invasive to the system. Pass differently tagged pointers to
461 	 * get_user() in order to verify that valid user pointers are going
462 	 * through and invalid kernel/non-canonical pointers are not.
463 	 */
464 	if (ioctl(fd, FIOASYNC, ptr) != 0)
465 		ret = 1;
466 
467 	close(fd);
468 error:
469 	munmap(ptr, PAGE_SIZE);
470 	return ret;
471 }
472 
sys_uring_setup(unsigned int entries,struct io_uring_params * p)473 int sys_uring_setup(unsigned int entries, struct io_uring_params *p)
474 {
475 	return (int)syscall(__NR_io_uring_setup, entries, p);
476 }
477 
sys_uring_enter(int fd,unsigned int to,unsigned int min,unsigned int flags)478 int sys_uring_enter(int fd, unsigned int to, unsigned int min, unsigned int flags)
479 {
480 	return (int)syscall(__NR_io_uring_enter, fd, to, min, flags, NULL, 0);
481 }
482 
483 /* Init submission queue and completion queue */
mmap_io_uring(struct io_uring_params p,struct io_ring * s)484 int mmap_io_uring(struct io_uring_params p, struct io_ring *s)
485 {
486 	struct io_uring_queue *sring = &s->sq_ring;
487 	struct io_uring_queue *cring = &s->cq_ring;
488 
489 	sring->ring_sz = p.sq_off.array + p.sq_entries * sizeof(unsigned int);
490 	cring->ring_sz = p.cq_off.cqes + p.cq_entries * sizeof(struct io_uring_cqe);
491 
492 	if (p.features & IORING_FEAT_SINGLE_MMAP) {
493 		if (cring->ring_sz > sring->ring_sz)
494 			sring->ring_sz = cring->ring_sz;
495 
496 		cring->ring_sz = sring->ring_sz;
497 	}
498 
499 	void *sq_ptr = mmap(0, sring->ring_sz, PROT_READ | PROT_WRITE,
500 			    MAP_SHARED | MAP_POPULATE, s->ring_fd,
501 			    IORING_OFF_SQ_RING);
502 
503 	if (sq_ptr == MAP_FAILED) {
504 		perror("sub-queue!");
505 		return 1;
506 	}
507 
508 	void *cq_ptr = sq_ptr;
509 
510 	if (!(p.features & IORING_FEAT_SINGLE_MMAP)) {
511 		cq_ptr = mmap(0, cring->ring_sz, PROT_READ | PROT_WRITE,
512 			      MAP_SHARED | MAP_POPULATE, s->ring_fd,
513 			      IORING_OFF_CQ_RING);
514 		if (cq_ptr == MAP_FAILED) {
515 			perror("cpl-queue!");
516 			munmap(sq_ptr, sring->ring_sz);
517 			return 1;
518 		}
519 	}
520 
521 	sring->head = sq_ptr + p.sq_off.head;
522 	sring->tail = sq_ptr + p.sq_off.tail;
523 	sring->ring_mask = sq_ptr + p.sq_off.ring_mask;
524 	sring->ring_entries = sq_ptr + p.sq_off.ring_entries;
525 	sring->flags = sq_ptr + p.sq_off.flags;
526 	sring->array = sq_ptr + p.sq_off.array;
527 
528 	/* Map a queue as mem map */
529 	s->sq_ring.queue.sqes = mmap(0, p.sq_entries * sizeof(struct io_uring_sqe),
530 				     PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
531 				     s->ring_fd, IORING_OFF_SQES);
532 	if (s->sq_ring.queue.sqes == MAP_FAILED) {
533 		munmap(sq_ptr, sring->ring_sz);
534 		if (sq_ptr != cq_ptr) {
535 			ksft_print_msg("failed to mmap uring queue!");
536 			munmap(cq_ptr, cring->ring_sz);
537 			return 1;
538 		}
539 	}
540 
541 	cring->head = cq_ptr + p.cq_off.head;
542 	cring->tail = cq_ptr + p.cq_off.tail;
543 	cring->ring_mask = cq_ptr + p.cq_off.ring_mask;
544 	cring->ring_entries = cq_ptr + p.cq_off.ring_entries;
545 	cring->queue.cqes = cq_ptr + p.cq_off.cqes;
546 
547 	return 0;
548 }
549 
550 /* Init io_uring queues */
setup_io_uring(struct io_ring * s)551 int setup_io_uring(struct io_ring *s)
552 {
553 	struct io_uring_params para;
554 
555 	memset(&para, 0, sizeof(para));
556 	s->ring_fd = sys_uring_setup(URING_QUEUE_SZ, &para);
557 	if (s->ring_fd < 0)
558 		return 1;
559 
560 	return mmap_io_uring(para, s);
561 }
562 
563 /*
564  * Get data from completion queue. the data buffer saved the file data
565  * return 0: success; others: error;
566  */
handle_uring_cq(struct io_ring * s)567 int handle_uring_cq(struct io_ring *s)
568 {
569 	struct file_io *fi = NULL;
570 	struct io_uring_queue *cring = &s->cq_ring;
571 	struct io_uring_cqe *cqe;
572 	unsigned int head;
573 	off_t len = 0;
574 
575 	head = *cring->head;
576 
577 	do {
578 		barrier();
579 		if (head == *cring->tail)
580 			break;
581 		/* Get the entry */
582 		cqe = &cring->queue.cqes[head & *s->cq_ring.ring_mask];
583 		fi = (struct file_io *)cqe->user_data;
584 		if (cqe->res < 0)
585 			break;
586 
587 		int blocks = (int)(fi->file_sz + URING_BLOCK_SZ - 1) / URING_BLOCK_SZ;
588 
589 		for (int i = 0; i < blocks; i++)
590 			len += fi->iovecs[i].iov_len;
591 
592 		head++;
593 	} while (1);
594 
595 	*cring->head = head;
596 	barrier();
597 
598 	return (len != fi->file_sz);
599 }
600 
601 /*
602  * Submit squeue. specify via IORING_OP_READV.
603  * the buffer need to be set metadata according to LAM mode
604  */
handle_uring_sq(struct io_ring * ring,struct file_io * fi,unsigned long lam)605 int handle_uring_sq(struct io_ring *ring, struct file_io *fi, unsigned long lam)
606 {
607 	int file_fd = fi->file_fd;
608 	struct io_uring_queue *sring = &ring->sq_ring;
609 	unsigned int index = 0, cur_block = 0, tail = 0, next_tail = 0;
610 	struct io_uring_sqe *sqe;
611 
612 	off_t remain = fi->file_sz;
613 	int blocks = (int)(remain + URING_BLOCK_SZ - 1) / URING_BLOCK_SZ;
614 
615 	while (remain) {
616 		off_t bytes = remain;
617 		void *buf;
618 
619 		if (bytes > URING_BLOCK_SZ)
620 			bytes = URING_BLOCK_SZ;
621 
622 		fi->iovecs[cur_block].iov_len = bytes;
623 
624 		if (posix_memalign(&buf, URING_BLOCK_SZ, URING_BLOCK_SZ))
625 			return 1;
626 
627 		fi->iovecs[cur_block].iov_base = (void *)set_metadata((uint64_t)buf, lam);
628 		remain -= bytes;
629 		cur_block++;
630 	}
631 
632 	next_tail = *sring->tail;
633 	tail = next_tail;
634 	next_tail++;
635 
636 	barrier();
637 
638 	index = tail & *ring->sq_ring.ring_mask;
639 
640 	sqe = &ring->sq_ring.queue.sqes[index];
641 	sqe->fd = file_fd;
642 	sqe->flags = 0;
643 	sqe->opcode = IORING_OP_READV;
644 	sqe->addr = (unsigned long)fi->iovecs;
645 	sqe->len = blocks;
646 	sqe->off = 0;
647 	sqe->user_data = (uint64_t)fi;
648 
649 	sring->array[index] = index;
650 	tail = next_tail;
651 
652 	if (*sring->tail != tail) {
653 		*sring->tail = tail;
654 		barrier();
655 	}
656 
657 	if (sys_uring_enter(ring->ring_fd, 1, 1, IORING_ENTER_GETEVENTS) < 0)
658 		return 1;
659 
660 	return 0;
661 }
662 
663 /*
664  * Test LAM in async I/O and io_uring, read current binery through io_uring
665  * Set metadata in pointers to iovecs buffer.
666  */
do_uring(unsigned long lam)667 int do_uring(unsigned long lam)
668 {
669 	struct io_ring *ring;
670 	struct file_io *fi;
671 	struct stat st;
672 	int ret = 1;
673 	char path[PATH_MAX] = {0};
674 
675 	/* get current process path */
676 	if (readlink("/proc/self/exe", path, PATH_MAX - 1) <= 0)
677 		return 1;
678 
679 	int file_fd = open(path, O_RDONLY);
680 
681 	if (file_fd < 0)
682 		return 1;
683 
684 	if (fstat(file_fd, &st) < 0)
685 		return 1;
686 
687 	off_t file_sz = st.st_size;
688 
689 	int blocks = (int)(file_sz + URING_BLOCK_SZ - 1) / URING_BLOCK_SZ;
690 
691 	fi = malloc(sizeof(*fi) + sizeof(struct iovec) * blocks);
692 	if (!fi)
693 		return 1;
694 
695 	fi->file_sz = file_sz;
696 	fi->file_fd = file_fd;
697 
698 	ring = malloc(sizeof(*ring));
699 	if (!ring) {
700 		free(fi);
701 		return 1;
702 	}
703 
704 	memset(ring, 0, sizeof(struct io_ring));
705 
706 	if (setup_io_uring(ring))
707 		goto out;
708 
709 	if (handle_uring_sq(ring, fi, lam))
710 		goto out;
711 
712 	ret = handle_uring_cq(ring);
713 
714 out:
715 	free(ring);
716 
717 	for (int i = 0; i < blocks; i++) {
718 		if (fi->iovecs[i].iov_base) {
719 			uint64_t addr = ((uint64_t)fi->iovecs[i].iov_base);
720 
721 			switch (lam) {
722 			case LAM_U57_BITS: /* Clear bits 62:57 */
723 				addr = (addr & ~(LAM_U57_MASK));
724 				break;
725 			}
726 			free((void *)addr);
727 			fi->iovecs[i].iov_base = NULL;
728 		}
729 	}
730 
731 	free(fi);
732 
733 	return ret;
734 }
735 
handle_uring(struct testcases * test)736 int handle_uring(struct testcases *test)
737 {
738 	int ret = 0;
739 
740 	if (test->later == 0 && test->lam != 0)
741 		if (set_lam(test->lam) != 0)
742 			return 1;
743 
744 	if (sigsetjmp(segv_env, 1) == 0) {
745 		signal(SIGSEGV, segv_handler);
746 		ret = do_uring(test->lam);
747 	} else {
748 		ret = 2;
749 	}
750 
751 	return ret;
752 }
753 
fork_test(struct testcases * test)754 static int fork_test(struct testcases *test)
755 {
756 	int ret, child_ret;
757 	pid_t pid;
758 
759 	pid = fork();
760 	if (pid < 0) {
761 		perror("Fork failed.");
762 		ret = 1;
763 	} else if (pid == 0) {
764 		ret = test->test_func(test);
765 		exit(ret);
766 	} else {
767 		wait(&child_ret);
768 		ret = WEXITSTATUS(child_ret);
769 	}
770 
771 	return ret;
772 }
773 
handle_execve(struct testcases * test)774 static int handle_execve(struct testcases *test)
775 {
776 	int ret, child_ret;
777 	int lam = test->lam;
778 	pid_t pid;
779 
780 	pid = fork();
781 	if (pid < 0) {
782 		perror("Fork failed.");
783 		ret = 1;
784 	} else if (pid == 0) {
785 		char path[PATH_MAX] = {0};
786 
787 		/* Set LAM mode in parent process */
788 		if (set_lam(lam) != 0)
789 			return 1;
790 
791 		/* Get current binary's path and the binary was run by execve */
792 		if (readlink("/proc/self/exe", path, PATH_MAX - 1) <= 0)
793 			exit(-1);
794 
795 		/* run binary to get LAM mode and return to parent process */
796 		if (execlp(path, path, "-t 0x0", NULL) < 0) {
797 			perror("error on exec");
798 			exit(-1);
799 		}
800 	} else {
801 		wait(&child_ret);
802 		ret = WEXITSTATUS(child_ret);
803 		if (ret != LAM_NONE)
804 			return 1;
805 	}
806 
807 	return 0;
808 }
809 
handle_inheritance(struct testcases * test)810 static int handle_inheritance(struct testcases *test)
811 {
812 	int ret, child_ret;
813 	int lam = test->lam;
814 	pid_t pid;
815 
816 	/* Set LAM mode in parent process */
817 	if (set_lam(lam) != 0)
818 		return 1;
819 
820 	pid = fork();
821 	if (pid < 0) {
822 		perror("Fork failed.");
823 		return 1;
824 	} else if (pid == 0) {
825 		/* Set LAM mode in parent process */
826 		int child_lam = get_lam();
827 
828 		exit(child_lam);
829 	} else {
830 		wait(&child_ret);
831 		ret = WEXITSTATUS(child_ret);
832 
833 		if (lam != ret)
834 			return 1;
835 	}
836 
837 	return 0;
838 }
839 
thread_fn_get_lam(void * arg)840 static int thread_fn_get_lam(void *arg)
841 {
842 	return get_lam();
843 }
844 
thread_fn_set_lam(void * arg)845 static int thread_fn_set_lam(void *arg)
846 {
847 	struct testcases *test = arg;
848 
849 	return set_lam(test->lam);
850 }
851 
handle_thread(struct testcases * test)852 static int handle_thread(struct testcases *test)
853 {
854 	char stack[STACK_SIZE];
855 	int ret, child_ret;
856 	int lam = 0;
857 	pid_t pid;
858 
859 	/* Set LAM mode in parent process */
860 	if (!test->later) {
861 		lam = test->lam;
862 		if (set_lam(lam) != 0)
863 			return 1;
864 	}
865 
866 	pid = clone(thread_fn_get_lam, stack + STACK_SIZE,
867 		    SIGCHLD | CLONE_FILES | CLONE_FS | CLONE_VM, NULL);
868 	if (pid < 0) {
869 		perror("Clone failed.");
870 		return 1;
871 	}
872 
873 	waitpid(pid, &child_ret, 0);
874 	ret = WEXITSTATUS(child_ret);
875 
876 	if (lam != ret)
877 		return 1;
878 
879 	if (test->later) {
880 		if (set_lam(test->lam) != 0)
881 			return 1;
882 	}
883 
884 	return 0;
885 }
886 
handle_thread_enable(struct testcases * test)887 static int handle_thread_enable(struct testcases *test)
888 {
889 	char stack[STACK_SIZE];
890 	int ret, child_ret;
891 	int lam = test->lam;
892 	pid_t pid;
893 
894 	pid = clone(thread_fn_set_lam, stack + STACK_SIZE,
895 		    SIGCHLD | CLONE_FILES | CLONE_FS | CLONE_VM, test);
896 	if (pid < 0) {
897 		perror("Clone failed.");
898 		return 1;
899 	}
900 
901 	waitpid(pid, &child_ret, 0);
902 	ret = WEXITSTATUS(child_ret);
903 
904 	if (lam != ret)
905 		return 1;
906 
907 	return 0;
908 }
run_test(struct testcases * test,int count)909 static void run_test(struct testcases *test, int count)
910 {
911 	int i, ret = 0;
912 
913 	for (i = 0; i < count; i++) {
914 		struct testcases *t = test + i;
915 
916 		/* fork a process to run test case */
917 		tests_cnt++;
918 		ret = fork_test(t);
919 
920 		/* return 3 is not support LA57, the case should be skipped */
921 		if (ret == 3) {
922 			ksft_test_result_skip("%s", t->msg);
923 			continue;
924 		}
925 
926 		if (ret != 0)
927 			ret = (t->expected == ret);
928 		else
929 			ret = !(t->expected);
930 
931 		ksft_test_result(ret, "%s", t->msg);
932 	}
933 }
934 
935 static struct testcases uring_cases[] = {
936 	{
937 		.later = 0,
938 		.lam = LAM_U57_BITS,
939 		.test_func = handle_uring,
940 		.msg = "URING: LAM_U57. Dereferencing pointer with metadata\n",
941 	},
942 	{
943 		.later = 1,
944 		.expected = 1,
945 		.lam = LAM_U57_BITS,
946 		.test_func = handle_uring,
947 		.msg = "URING:[Negative] Disable LAM. Dereferencing pointer with metadata.\n",
948 	},
949 };
950 
951 static struct testcases malloc_cases[] = {
952 	{
953 		.later = 0,
954 		.lam = LAM_U57_BITS,
955 		.test_func = handle_malloc,
956 		.msg = "MALLOC: LAM_U57. Dereferencing pointer with metadata\n",
957 	},
958 	{
959 		.later = 1,
960 		.expected = 2,
961 		.lam = LAM_U57_BITS,
962 		.test_func = handle_malloc,
963 		.msg = "MALLOC:[Negative] Disable LAM. Dereferencing pointer with metadata.\n",
964 	},
965 };
966 
967 static struct testcases bits_cases[] = {
968 	{
969 		.test_func = handle_max_bits,
970 		.msg = "BITS: Check default tag bits\n",
971 	},
972 };
973 
974 static struct testcases syscall_cases[] = {
975 	{
976 		.later = 0,
977 		.lam = LAM_U57_BITS,
978 		.test_func = handle_syscall,
979 		.msg = "SYSCALL: LAM_U57. syscall with metadata\n",
980 	},
981 	{
982 		.later = 1,
983 		.expected = 1,
984 		.lam = LAM_U57_BITS,
985 		.test_func = handle_syscall,
986 		.msg = "SYSCALL:[Negative] Disable LAM. Dereferencing pointer with metadata.\n",
987 	},
988 	{
989 		.later = GET_USER_USER,
990 		.lam = LAM_U57_BITS,
991 		.test_func = get_user_syscall,
992 		.msg = "GET_USER: get_user() and pass a properly tagged user pointer.\n",
993 	},
994 	{
995 		.later = GET_USER_KERNEL_TOP,
996 		.expected = 1,
997 		.lam = LAM_U57_BITS,
998 		.test_func = get_user_syscall,
999 		.msg = "GET_USER:[Negative] get_user() with a kernel pointer and the top bit cleared.\n",
1000 	},
1001 	{
1002 		.later = GET_USER_KERNEL_BOT,
1003 		.expected = 1,
1004 		.lam = LAM_U57_BITS,
1005 		.test_func = get_user_syscall,
1006 		.msg = "GET_USER:[Negative] get_user() with a kernel pointer and the bottom sign-extension bit cleared.\n",
1007 	},
1008 	{
1009 		.later = GET_USER_KERNEL,
1010 		.expected = 1,
1011 		.lam = LAM_U57_BITS,
1012 		.test_func = get_user_syscall,
1013 		.msg = "GET_USER:[Negative] get_user() and pass a kernel pointer.\n",
1014 	},
1015 };
1016 
1017 static struct testcases mmap_cases[] = {
1018 	{
1019 		.later = 1,
1020 		.expected = 0,
1021 		.lam = LAM_U57_BITS,
1022 		.addr = HIGH_ADDR,
1023 		.test_func = handle_mmap,
1024 		.msg = "MMAP: First mmap high address, then set LAM_U57.\n",
1025 	},
1026 	{
1027 		.later = 0,
1028 		.expected = 0,
1029 		.lam = LAM_U57_BITS,
1030 		.addr = HIGH_ADDR,
1031 		.test_func = handle_mmap,
1032 		.msg = "MMAP: First LAM_U57, then High address.\n",
1033 	},
1034 	{
1035 		.later = 0,
1036 		.expected = 0,
1037 		.lam = LAM_U57_BITS,
1038 		.addr = LOW_ADDR,
1039 		.test_func = handle_mmap,
1040 		.msg = "MMAP: First LAM_U57, then Low address.\n",
1041 	},
1042 };
1043 
1044 static struct testcases inheritance_cases[] = {
1045 	{
1046 		.expected = 0,
1047 		.lam = LAM_U57_BITS,
1048 		.test_func = handle_inheritance,
1049 		.msg = "FORK: LAM_U57, child process should get LAM mode same as parent\n",
1050 	},
1051 	{
1052 		.expected = 0,
1053 		.lam = LAM_U57_BITS,
1054 		.test_func = handle_thread,
1055 		.msg = "THREAD: LAM_U57, child thread should get LAM mode same as parent\n",
1056 	},
1057 	{
1058 		.expected = 1,
1059 		.lam = LAM_U57_BITS,
1060 		.test_func = handle_thread_enable,
1061 		.msg = "THREAD: [NEGATIVE] Enable LAM in child.\n",
1062 	},
1063 	{
1064 		.expected = 1,
1065 		.later = 1,
1066 		.lam = LAM_U57_BITS,
1067 		.test_func = handle_thread,
1068 		.msg = "THREAD: [NEGATIVE] Enable LAM in parent after thread created.\n",
1069 	},
1070 	{
1071 		.expected = 0,
1072 		.lam = LAM_U57_BITS,
1073 		.test_func = handle_execve,
1074 		.msg = "EXECVE: LAM_U57, child process should get disabled LAM mode\n",
1075 	},
1076 };
1077 
cmd_help(void)1078 static void cmd_help(void)
1079 {
1080 	printf("usage: lam [-h] [-t test list]\n");
1081 	printf("\t-t test list: run tests specified in the test list, default:0x%x\n", TEST_MASK);
1082 	printf("\t\t0x1:malloc; 0x2:max_bits; 0x4:mmap; 0x8:syscall; 0x10:io_uring; 0x20:inherit;\n");
1083 	printf("\t-h: help\n");
1084 }
1085 
1086 /* Check for file existence */
file_Exists(const char * fileName)1087 uint8_t file_Exists(const char *fileName)
1088 {
1089 	struct stat buffer;
1090 
1091 	uint8_t ret = (stat(fileName, &buffer) == 0);
1092 
1093 	return ret;
1094 }
1095 
1096 /* Sysfs idxd files */
1097 const char *dsa_configs[] = {
1098 	"echo 1 > /sys/bus/dsa/devices/dsa0/wq0.1/group_id",
1099 	"echo shared > /sys/bus/dsa/devices/dsa0/wq0.1/mode",
1100 	"echo 10 > /sys/bus/dsa/devices/dsa0/wq0.1/priority",
1101 	"echo 16 > /sys/bus/dsa/devices/dsa0/wq0.1/size",
1102 	"echo 15 > /sys/bus/dsa/devices/dsa0/wq0.1/threshold",
1103 	"echo user > /sys/bus/dsa/devices/dsa0/wq0.1/type",
1104 	"echo MyApp1 > /sys/bus/dsa/devices/dsa0/wq0.1/name",
1105 	"echo 1 > /sys/bus/dsa/devices/dsa0/engine0.1/group_id",
1106 	"echo dsa0 > /sys/bus/dsa/drivers/idxd/bind",
1107 	/* bind files and devices, generated a device file in /dev */
1108 	"echo wq0.1 > /sys/bus/dsa/drivers/user/bind",
1109 };
1110 
1111 /* DSA device file */
1112 const char *dsaDeviceFile = "/dev/dsa/wq0.1";
1113 /* file for io*/
1114 const char *dsaPasidEnable = "/sys/bus/dsa/devices/dsa0/pasid_enabled";
1115 
1116 /*
1117  * DSA depends on kernel cmdline "intel_iommu=on,sm_on"
1118  * return pasid_enabled (0: disable 1:enable)
1119  */
Check_DSA_Kernel_Setting(void)1120 int Check_DSA_Kernel_Setting(void)
1121 {
1122 	char command[256] = "";
1123 	char buf[256] = "";
1124 	char *ptr;
1125 	int rv = -1;
1126 
1127 	snprintf(command, sizeof(command) - 1, "cat %s", dsaPasidEnable);
1128 
1129 	FILE *cmd = popen(command, "r");
1130 
1131 	if (cmd) {
1132 		while (fgets(buf, sizeof(buf) - 1, cmd) != NULL);
1133 
1134 		pclose(cmd);
1135 		rv = strtol(buf, &ptr, 16);
1136 	}
1137 
1138 	return rv;
1139 }
1140 
1141 /*
1142  * Config DSA's sysfs files as shared DSA's WQ.
1143  * Generated a device file /dev/dsa/wq0.1
1144  * Return:  0 OK; 1 Failed; 3 Skip(SVA disabled).
1145  */
Dsa_Init_Sysfs(void)1146 int Dsa_Init_Sysfs(void)
1147 {
1148 	uint len = ARRAY_SIZE(dsa_configs);
1149 	const char **p = dsa_configs;
1150 
1151 	if (file_Exists(dsaDeviceFile) == 1)
1152 		return 0;
1153 
1154 	/* check the idxd driver */
1155 	if (file_Exists(dsaPasidEnable) != 1) {
1156 		printf("Please make sure idxd driver was loaded\n");
1157 		return 3;
1158 	}
1159 
1160 	/* Check SVA feature */
1161 	if (Check_DSA_Kernel_Setting() != 1) {
1162 		printf("Please enable SVA.(Add intel_iommu=on,sm_on in kernel cmdline)\n");
1163 		return 3;
1164 	}
1165 
1166 	/* Check the idxd device file on /dev/dsa/ */
1167 	for (int i = 0; i < len; i++) {
1168 		if (system(p[i]))
1169 			return 1;
1170 	}
1171 
1172 	/* After config, /dev/dsa/wq0.1 should be generated */
1173 	return (file_Exists(dsaDeviceFile) != 1);
1174 }
1175 
1176 /*
1177  * Open DSA device file, triger API: iommu_sva_alloc_pasid
1178  */
allocate_dsa_pasid(void)1179 void *allocate_dsa_pasid(void)
1180 {
1181 	int fd;
1182 	void *wq;
1183 
1184 	fd = open(dsaDeviceFile, O_RDWR);
1185 	if (fd < 0) {
1186 		perror("open");
1187 		return MAP_FAILED;
1188 	}
1189 
1190 	wq = mmap(NULL, 0x1000, PROT_WRITE,
1191 			   MAP_SHARED | MAP_POPULATE, fd, 0);
1192 	if (wq == MAP_FAILED)
1193 		perror("mmap");
1194 
1195 	return wq;
1196 }
1197 
set_force_svm(void)1198 int set_force_svm(void)
1199 {
1200 	int ret = 0;
1201 
1202 	ret = syscall(SYS_arch_prctl, ARCH_FORCE_TAGGED_SVA);
1203 
1204 	return ret;
1205 }
1206 
handle_pasid(struct testcases * test)1207 int handle_pasid(struct testcases *test)
1208 {
1209 	uint tmp = test->cmd;
1210 	uint runed = 0x0;
1211 	int ret = 0;
1212 	void *wq = NULL;
1213 
1214 	ret = Dsa_Init_Sysfs();
1215 	if (ret != 0)
1216 		return ret;
1217 
1218 	for (int i = 0; i < 3; i++) {
1219 		int err = 0;
1220 
1221 		if (tmp & 0x1) {
1222 			/* run set lam mode*/
1223 			if ((runed & 0x1) == 0)	{
1224 				err = set_lam(LAM_U57_BITS);
1225 				runed = runed | 0x1;
1226 			} else
1227 				err = 1;
1228 		} else if (tmp & 0x4) {
1229 			/* run force svm */
1230 			if ((runed & 0x4) == 0)	{
1231 				err = set_force_svm();
1232 				runed = runed | 0x4;
1233 			} else
1234 				err = 1;
1235 		} else if (tmp & 0x2) {
1236 			/* run allocate pasid */
1237 			if ((runed & 0x2) == 0) {
1238 				runed = runed | 0x2;
1239 				wq = allocate_dsa_pasid();
1240 				if (wq == MAP_FAILED)
1241 					err = 1;
1242 			} else
1243 				err = 1;
1244 		}
1245 
1246 		ret = ret + err;
1247 		if (ret > 0)
1248 			break;
1249 
1250 		tmp = tmp >> 4;
1251 	}
1252 
1253 	if (wq != MAP_FAILED && wq != NULL)
1254 		if (munmap(wq, 0x1000))
1255 			printf("munmap failed %d\n", errno);
1256 
1257 	if (runed != 0x7)
1258 		ret = 1;
1259 
1260 	return (ret != 0);
1261 }
1262 
1263 /*
1264  * Pasid test depends on idxd and SVA, kernel should enable iommu and sm.
1265  * command line(intel_iommu=on,sm_on)
1266  */
1267 static struct testcases pasid_cases[] = {
1268 	{
1269 		.expected = 1,
1270 		.cmd = PAS_CMD(LAM_CMD_BIT, PAS_CMD_BIT, SVA_CMD_BIT),
1271 		.test_func = handle_pasid,
1272 		.msg = "PASID: [Negative] Execute LAM, PASID, SVA in sequence\n",
1273 	},
1274 	{
1275 		.expected = 0,
1276 		.cmd = PAS_CMD(LAM_CMD_BIT, SVA_CMD_BIT, PAS_CMD_BIT),
1277 		.test_func = handle_pasid,
1278 		.msg = "PASID: Execute LAM, SVA, PASID in sequence\n",
1279 	},
1280 	{
1281 		.expected = 1,
1282 		.cmd = PAS_CMD(PAS_CMD_BIT, LAM_CMD_BIT, SVA_CMD_BIT),
1283 		.test_func = handle_pasid,
1284 		.msg = "PASID: [Negative] Execute PASID, LAM, SVA in sequence\n",
1285 	},
1286 	{
1287 		.expected = 0,
1288 		.cmd = PAS_CMD(PAS_CMD_BIT, SVA_CMD_BIT, LAM_CMD_BIT),
1289 		.test_func = handle_pasid,
1290 		.msg = "PASID: Execute PASID, SVA, LAM in sequence\n",
1291 	},
1292 	{
1293 		.expected = 0,
1294 		.cmd = PAS_CMD(SVA_CMD_BIT, LAM_CMD_BIT, PAS_CMD_BIT),
1295 		.test_func = handle_pasid,
1296 		.msg = "PASID: Execute SVA, LAM, PASID in sequence\n",
1297 	},
1298 	{
1299 		.expected = 0,
1300 		.cmd = PAS_CMD(SVA_CMD_BIT, PAS_CMD_BIT, LAM_CMD_BIT),
1301 		.test_func = handle_pasid,
1302 		.msg = "PASID: Execute SVA, PASID, LAM in sequence\n",
1303 	},
1304 };
1305 
main(int argc,char ** argv)1306 int main(int argc, char **argv)
1307 {
1308 	int c = 0;
1309 	unsigned int tests = TEST_MASK;
1310 
1311 	tests_cnt = 0;
1312 
1313 	if (!lam_is_available())
1314 		return KSFT_SKIP;
1315 
1316 	while ((c = getopt(argc, argv, "ht:")) != -1) {
1317 		switch (c) {
1318 		case 't':
1319 			tests = strtoul(optarg, NULL, 16);
1320 			if (tests && !(tests & TEST_MASK)) {
1321 				ksft_print_msg("Invalid argument!\n");
1322 				return -1;
1323 			}
1324 			break;
1325 		case 'h':
1326 			cmd_help();
1327 			return 0;
1328 		default:
1329 			ksft_print_msg("Invalid argument\n");
1330 			return -1;
1331 		}
1332 	}
1333 
1334 	/*
1335 	 * When tests is 0, it is not a real test case;
1336 	 * the option used by test case(execve) to check the lam mode in
1337 	 * process generated by execve, the process read back lam mode and
1338 	 * check with lam mode in parent process.
1339 	 */
1340 	if (!tests)
1341 		return (get_lam());
1342 
1343 	/* Run test cases */
1344 	if (tests & FUNC_MALLOC)
1345 		run_test(malloc_cases, ARRAY_SIZE(malloc_cases));
1346 
1347 	if (tests & FUNC_BITS)
1348 		run_test(bits_cases, ARRAY_SIZE(bits_cases));
1349 
1350 	if (tests & FUNC_MMAP)
1351 		run_test(mmap_cases, ARRAY_SIZE(mmap_cases));
1352 
1353 	if (tests & FUNC_SYSCALL)
1354 		run_test(syscall_cases, ARRAY_SIZE(syscall_cases));
1355 
1356 	if (tests & FUNC_URING)
1357 		run_test(uring_cases, ARRAY_SIZE(uring_cases));
1358 
1359 	if (tests & FUNC_INHERITE)
1360 		run_test(inheritance_cases, ARRAY_SIZE(inheritance_cases));
1361 
1362 	if (tests & FUNC_PASID)
1363 		run_test(pasid_cases, ARRAY_SIZE(pasid_cases));
1364 
1365 	ksft_set_plan(tests_cnt);
1366 
1367 	ksft_exit_pass();
1368 }
1369