1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3 #include <stdio.h>
4 #include <stdlib.h>
5 #include <string.h>
6 #include <sys/syscall.h>
7 #include <sys/ioctl.h>
8 #include <time.h>
9 #include <signal.h>
10 #include <setjmp.h>
11 #include <sys/mman.h>
12 #include <sys/utsname.h>
13 #include <sys/wait.h>
14 #include <sys/stat.h>
15 #include <fcntl.h>
16 #include <inttypes.h>
17 #include <sched.h>
18
19 #include <sys/uio.h>
20 #include <linux/io_uring.h>
21 #include "kselftest.h"
22
23 #ifndef __x86_64__
24 # error This test is 64-bit only
25 #endif
26
27 /* LAM modes, these definitions were copied from kernel code */
28 #define LAM_NONE 0
29 #define LAM_U57_BITS 6
30
31 #define LAM_U57_MASK (0x3fULL << 57)
32 /* arch prctl for LAM */
33 #define ARCH_GET_UNTAG_MASK 0x4001
34 #define ARCH_ENABLE_TAGGED_ADDR 0x4002
35 #define ARCH_GET_MAX_TAG_BITS 0x4003
36 #define ARCH_FORCE_TAGGED_SVA 0x4004
37
38 /* Specified test function bits */
39 #define FUNC_MALLOC 0x1
40 #define FUNC_BITS 0x2
41 #define FUNC_MMAP 0x4
42 #define FUNC_SYSCALL 0x8
43 #define FUNC_URING 0x10
44 #define FUNC_INHERITE 0x20
45 #define FUNC_PASID 0x40
46
47 /* get_user() pointer test cases */
48 #define GET_USER_USER 0
49 #define GET_USER_KERNEL_TOP 1
50 #define GET_USER_KERNEL_BOT 2
51 #define GET_USER_KERNEL 3
52
53 #define TEST_MASK 0x7f
54 #define L5_SIGN_EXT_MASK (0xFFUL << 56)
55 #define L4_SIGN_EXT_MASK (0x1FFFFUL << 47)
56
57 #define LOW_ADDR (0x1UL << 30)
58 #define HIGH_ADDR (0x3UL << 48)
59
60 #define MALLOC_LEN 32
61
62 #define PAGE_SIZE (4 << 10)
63
64 #define STACK_SIZE 65536
65
66 #define barrier() ({ \
67 __asm__ __volatile__("" : : : "memory"); \
68 })
69
70 #define URING_QUEUE_SZ 1
71 #define URING_BLOCK_SZ 2048
72
73 /* Pasid test define */
74 #define LAM_CMD_BIT 0x1
75 #define PAS_CMD_BIT 0x2
76 #define SVA_CMD_BIT 0x4
77
78 #define PAS_CMD(cmd1, cmd2, cmd3) (((cmd3) << 8) | ((cmd2) << 4) | ((cmd1) << 0))
79
80 struct testcases {
81 unsigned int later;
82 int expected; /* 2: SIGSEGV Error; 1: other errors */
83 unsigned long lam;
84 uint64_t addr;
85 uint64_t cmd;
86 int (*test_func)(struct testcases *test);
87 const char *msg;
88 };
89
90 /* Used by CQ of uring, source file handler and file's size */
91 struct file_io {
92 int file_fd;
93 off_t file_sz;
94 struct iovec iovecs[];
95 };
96
97 struct io_uring_queue {
98 unsigned int *head;
99 unsigned int *tail;
100 unsigned int *ring_mask;
101 unsigned int *ring_entries;
102 unsigned int *flags;
103 unsigned int *array;
104 union {
105 struct io_uring_cqe *cqes;
106 struct io_uring_sqe *sqes;
107 } queue;
108 size_t ring_sz;
109 };
110
111 struct io_ring {
112 int ring_fd;
113 struct io_uring_queue sq_ring;
114 struct io_uring_queue cq_ring;
115 };
116
117 int tests_cnt;
118 jmp_buf segv_env;
119
segv_handler(int sig)120 static void segv_handler(int sig)
121 {
122 ksft_print_msg("Get segmentation fault(%d).", sig);
123
124 siglongjmp(segv_env, 1);
125 }
126
lam_is_available(void)127 static inline int lam_is_available(void)
128 {
129 unsigned int cpuinfo[4];
130 unsigned long bits = 0;
131 int ret;
132
133 __cpuid_count(0x7, 1, cpuinfo[0], cpuinfo[1], cpuinfo[2], cpuinfo[3]);
134
135 /* Check if cpu supports LAM */
136 if (!(cpuinfo[0] & (1 << 26))) {
137 ksft_print_msg("LAM is not supported!\n");
138 return 0;
139 }
140
141 /* Return 0 if CONFIG_ADDRESS_MASKING is not set */
142 ret = syscall(SYS_arch_prctl, ARCH_GET_MAX_TAG_BITS, &bits);
143 if (ret) {
144 ksft_print_msg("LAM is disabled in the kernel!\n");
145 return 0;
146 }
147
148 return 1;
149 }
150
la57_enabled(void)151 static inline int la57_enabled(void)
152 {
153 int ret;
154 void *p;
155
156 p = mmap((void *)HIGH_ADDR, PAGE_SIZE, PROT_READ | PROT_WRITE,
157 MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
158
159 ret = p == MAP_FAILED ? 0 : 1;
160
161 munmap(p, PAGE_SIZE);
162 return ret;
163 }
164
165 /*
166 * Set tagged address and read back untag mask.
167 * check if the untagged mask is expected.
168 *
169 * @return:
170 * 0: Set LAM mode successfully
171 * others: failed to set LAM
172 */
set_lam(unsigned long lam)173 static int set_lam(unsigned long lam)
174 {
175 int ret = 0;
176 uint64_t ptr = 0;
177
178 if (lam != LAM_U57_BITS && lam != LAM_NONE)
179 return -1;
180
181 /* Skip check return */
182 syscall(SYS_arch_prctl, ARCH_ENABLE_TAGGED_ADDR, lam);
183
184 /* Get untagged mask */
185 syscall(SYS_arch_prctl, ARCH_GET_UNTAG_MASK, &ptr);
186
187 /* Check mask returned is expected */
188 if (lam == LAM_U57_BITS)
189 ret = (ptr != ~(LAM_U57_MASK));
190 else if (lam == LAM_NONE)
191 ret = (ptr != -1ULL);
192
193 return ret;
194 }
195
get_default_tag_bits(void)196 static unsigned long get_default_tag_bits(void)
197 {
198 pid_t pid;
199 int lam = LAM_NONE;
200 int ret = 0;
201
202 pid = fork();
203 if (pid < 0) {
204 perror("Fork failed.");
205 } else if (pid == 0) {
206 /* Set LAM mode in child process */
207 if (set_lam(LAM_U57_BITS) == 0)
208 lam = LAM_U57_BITS;
209 else
210 lam = LAM_NONE;
211 exit(lam);
212 } else {
213 wait(&ret);
214 lam = WEXITSTATUS(ret);
215 }
216
217 return lam;
218 }
219
220 /*
221 * Set tagged address and read back untag mask.
222 * check if the untag mask is expected.
223 */
get_lam(void)224 static int get_lam(void)
225 {
226 uint64_t ptr = 0;
227 int ret = -1;
228 /* Get untagged mask */
229 if (syscall(SYS_arch_prctl, ARCH_GET_UNTAG_MASK, &ptr) == -1)
230 return -1;
231
232 /* Check mask returned is expected */
233 if (ptr == ~(LAM_U57_MASK))
234 ret = LAM_U57_BITS;
235 else if (ptr == -1ULL)
236 ret = LAM_NONE;
237
238
239 return ret;
240 }
241
242 /* According to LAM mode, set metadata in high bits */
set_metadata(uint64_t src,unsigned long lam)243 static uint64_t set_metadata(uint64_t src, unsigned long lam)
244 {
245 uint64_t metadata;
246
247 srand(time(NULL));
248
249 switch (lam) {
250 case LAM_U57_BITS: /* Set metadata in bits 62:57 */
251 /* Get a random non-zero value as metadata */
252 metadata = (rand() % ((1UL << LAM_U57_BITS) - 1) + 1) << 57;
253 metadata |= (src & ~(LAM_U57_MASK));
254 break;
255 default:
256 metadata = src;
257 break;
258 }
259
260 return metadata;
261 }
262
263 /*
264 * Set metadata in user pointer, compare new pointer with original pointer.
265 * both pointers should point to the same address.
266 *
267 * @return:
268 * 0: value on the pointer with metadata and value on original are same
269 * 1: not same.
270 */
handle_lam_test(void * src,unsigned int lam)271 static int handle_lam_test(void *src, unsigned int lam)
272 {
273 char *ptr;
274
275 strcpy((char *)src, "USER POINTER");
276
277 ptr = (char *)set_metadata((uint64_t)src, lam);
278 if (src == ptr)
279 return 0;
280
281 /* Copy a string into the pointer with metadata */
282 strcpy((char *)ptr, "METADATA POINTER");
283
284 return (!!strcmp((char *)src, (char *)ptr));
285 }
286
287
handle_max_bits(struct testcases * test)288 int handle_max_bits(struct testcases *test)
289 {
290 unsigned long exp_bits = get_default_tag_bits();
291 unsigned long bits = 0;
292
293 if (exp_bits != LAM_NONE)
294 exp_bits = LAM_U57_BITS;
295
296 /* Get LAM max tag bits */
297 if (syscall(SYS_arch_prctl, ARCH_GET_MAX_TAG_BITS, &bits) == -1)
298 return 1;
299
300 return (exp_bits != bits);
301 }
302
303 /*
304 * Test lam feature through dereference pointer get from malloc.
305 * @return 0: Pass test. 1: Get failure during test 2: Get SIGSEGV
306 */
handle_malloc(struct testcases * test)307 static int handle_malloc(struct testcases *test)
308 {
309 char *ptr = NULL;
310 int ret = 0;
311
312 if (test->later == 0 && test->lam != 0)
313 if (set_lam(test->lam) == -1)
314 return 1;
315
316 ptr = (char *)malloc(MALLOC_LEN);
317 if (ptr == NULL) {
318 perror("malloc() failure\n");
319 return 1;
320 }
321
322 /* Set signal handler */
323 if (sigsetjmp(segv_env, 1) == 0) {
324 signal(SIGSEGV, segv_handler);
325 ret = handle_lam_test(ptr, test->lam);
326 } else {
327 ret = 2;
328 }
329
330 if (test->later != 0 && test->lam != 0)
331 if (set_lam(test->lam) == -1 && ret == 0)
332 ret = 1;
333
334 free(ptr);
335
336 return ret;
337 }
338
handle_mmap(struct testcases * test)339 static int handle_mmap(struct testcases *test)
340 {
341 void *ptr;
342 unsigned int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED;
343 int ret = 0;
344
345 if (test->later == 0 && test->lam != 0)
346 if (set_lam(test->lam) != 0)
347 return 1;
348
349 ptr = mmap((void *)test->addr, PAGE_SIZE, PROT_READ | PROT_WRITE,
350 flags, -1, 0);
351 if (ptr == MAP_FAILED) {
352 if (test->addr == HIGH_ADDR)
353 if (!la57_enabled())
354 return 3; /* unsupport LA57 */
355 return 1;
356 }
357
358 if (test->later != 0 && test->lam != 0)
359 if (set_lam(test->lam) != 0)
360 ret = 1;
361
362 if (ret == 0) {
363 if (sigsetjmp(segv_env, 1) == 0) {
364 signal(SIGSEGV, segv_handler);
365 ret = handle_lam_test(ptr, test->lam);
366 } else {
367 ret = 2;
368 }
369 }
370
371 munmap(ptr, PAGE_SIZE);
372 return ret;
373 }
374
handle_syscall(struct testcases * test)375 static int handle_syscall(struct testcases *test)
376 {
377 struct utsname unme, *pu;
378 int ret = 0;
379
380 if (test->later == 0 && test->lam != 0)
381 if (set_lam(test->lam) != 0)
382 return 1;
383
384 if (sigsetjmp(segv_env, 1) == 0) {
385 signal(SIGSEGV, segv_handler);
386 pu = (struct utsname *)set_metadata((uint64_t)&unme, test->lam);
387 ret = uname(pu);
388 if (ret < 0)
389 ret = 1;
390 } else {
391 ret = 2;
392 }
393
394 if (test->later != 0 && test->lam != 0)
395 if (set_lam(test->lam) != -1 && ret == 0)
396 ret = 1;
397
398 return ret;
399 }
400
get_user_syscall(struct testcases * test)401 static int get_user_syscall(struct testcases *test)
402 {
403 uint64_t ptr_address, bitmask;
404 int fd, ret = 0;
405 void *ptr;
406
407 if (la57_enabled()) {
408 bitmask = L5_SIGN_EXT_MASK;
409 ptr_address = HIGH_ADDR;
410 } else {
411 bitmask = L4_SIGN_EXT_MASK;
412 ptr_address = LOW_ADDR;
413 }
414
415 ptr = mmap((void *)ptr_address, PAGE_SIZE, PROT_READ | PROT_WRITE,
416 MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
417
418 if (ptr == MAP_FAILED) {
419 perror("failed to map byte to pass into get_user");
420 return 1;
421 }
422
423 if (set_lam(test->lam) != 0) {
424 ret = 2;
425 goto error;
426 }
427
428 fd = memfd_create("lam_ioctl", 0);
429 if (fd == -1) {
430 munmap(ptr, PAGE_SIZE);
431 exit(EXIT_FAILURE);
432 }
433
434 switch (test->later) {
435 case GET_USER_USER:
436 /* Control group - properly tagged user pointer */
437 ptr = (void *)set_metadata((uint64_t)ptr, test->lam);
438 break;
439 case GET_USER_KERNEL_TOP:
440 /* Kernel address with top bit cleared */
441 bitmask &= (bitmask >> 1);
442 ptr = (void *)((uint64_t)ptr | bitmask);
443 break;
444 case GET_USER_KERNEL_BOT:
445 /* Kernel address with bottom sign-extension bit cleared */
446 bitmask &= (bitmask << 1);
447 ptr = (void *)((uint64_t)ptr | bitmask);
448 break;
449 case GET_USER_KERNEL:
450 /* Try to pass a kernel address */
451 ptr = (void *)((uint64_t)ptr | bitmask);
452 break;
453 default:
454 printf("Invalid test case value passed!\n");
455 break;
456 }
457
458 /*
459 * Use FIOASYNC ioctl because it utilizes get_user() internally and is
460 * very non-invasive to the system. Pass differently tagged pointers to
461 * get_user() in order to verify that valid user pointers are going
462 * through and invalid kernel/non-canonical pointers are not.
463 */
464 if (ioctl(fd, FIOASYNC, ptr) != 0)
465 ret = 1;
466
467 close(fd);
468 error:
469 munmap(ptr, PAGE_SIZE);
470 return ret;
471 }
472
sys_uring_setup(unsigned int entries,struct io_uring_params * p)473 int sys_uring_setup(unsigned int entries, struct io_uring_params *p)
474 {
475 return (int)syscall(__NR_io_uring_setup, entries, p);
476 }
477
sys_uring_enter(int fd,unsigned int to,unsigned int min,unsigned int flags)478 int sys_uring_enter(int fd, unsigned int to, unsigned int min, unsigned int flags)
479 {
480 return (int)syscall(__NR_io_uring_enter, fd, to, min, flags, NULL, 0);
481 }
482
483 /* Init submission queue and completion queue */
mmap_io_uring(struct io_uring_params p,struct io_ring * s)484 int mmap_io_uring(struct io_uring_params p, struct io_ring *s)
485 {
486 struct io_uring_queue *sring = &s->sq_ring;
487 struct io_uring_queue *cring = &s->cq_ring;
488
489 sring->ring_sz = p.sq_off.array + p.sq_entries * sizeof(unsigned int);
490 cring->ring_sz = p.cq_off.cqes + p.cq_entries * sizeof(struct io_uring_cqe);
491
492 if (p.features & IORING_FEAT_SINGLE_MMAP) {
493 if (cring->ring_sz > sring->ring_sz)
494 sring->ring_sz = cring->ring_sz;
495
496 cring->ring_sz = sring->ring_sz;
497 }
498
499 void *sq_ptr = mmap(0, sring->ring_sz, PROT_READ | PROT_WRITE,
500 MAP_SHARED | MAP_POPULATE, s->ring_fd,
501 IORING_OFF_SQ_RING);
502
503 if (sq_ptr == MAP_FAILED) {
504 perror("sub-queue!");
505 return 1;
506 }
507
508 void *cq_ptr = sq_ptr;
509
510 if (!(p.features & IORING_FEAT_SINGLE_MMAP)) {
511 cq_ptr = mmap(0, cring->ring_sz, PROT_READ | PROT_WRITE,
512 MAP_SHARED | MAP_POPULATE, s->ring_fd,
513 IORING_OFF_CQ_RING);
514 if (cq_ptr == MAP_FAILED) {
515 perror("cpl-queue!");
516 munmap(sq_ptr, sring->ring_sz);
517 return 1;
518 }
519 }
520
521 sring->head = sq_ptr + p.sq_off.head;
522 sring->tail = sq_ptr + p.sq_off.tail;
523 sring->ring_mask = sq_ptr + p.sq_off.ring_mask;
524 sring->ring_entries = sq_ptr + p.sq_off.ring_entries;
525 sring->flags = sq_ptr + p.sq_off.flags;
526 sring->array = sq_ptr + p.sq_off.array;
527
528 /* Map a queue as mem map */
529 s->sq_ring.queue.sqes = mmap(0, p.sq_entries * sizeof(struct io_uring_sqe),
530 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
531 s->ring_fd, IORING_OFF_SQES);
532 if (s->sq_ring.queue.sqes == MAP_FAILED) {
533 munmap(sq_ptr, sring->ring_sz);
534 if (sq_ptr != cq_ptr) {
535 ksft_print_msg("failed to mmap uring queue!");
536 munmap(cq_ptr, cring->ring_sz);
537 return 1;
538 }
539 }
540
541 cring->head = cq_ptr + p.cq_off.head;
542 cring->tail = cq_ptr + p.cq_off.tail;
543 cring->ring_mask = cq_ptr + p.cq_off.ring_mask;
544 cring->ring_entries = cq_ptr + p.cq_off.ring_entries;
545 cring->queue.cqes = cq_ptr + p.cq_off.cqes;
546
547 return 0;
548 }
549
550 /* Init io_uring queues */
setup_io_uring(struct io_ring * s)551 int setup_io_uring(struct io_ring *s)
552 {
553 struct io_uring_params para;
554
555 memset(¶, 0, sizeof(para));
556 s->ring_fd = sys_uring_setup(URING_QUEUE_SZ, ¶);
557 if (s->ring_fd < 0)
558 return 1;
559
560 return mmap_io_uring(para, s);
561 }
562
563 /*
564 * Get data from completion queue. the data buffer saved the file data
565 * return 0: success; others: error;
566 */
handle_uring_cq(struct io_ring * s)567 int handle_uring_cq(struct io_ring *s)
568 {
569 struct file_io *fi = NULL;
570 struct io_uring_queue *cring = &s->cq_ring;
571 struct io_uring_cqe *cqe;
572 unsigned int head;
573 off_t len = 0;
574
575 head = *cring->head;
576
577 do {
578 barrier();
579 if (head == *cring->tail)
580 break;
581 /* Get the entry */
582 cqe = &cring->queue.cqes[head & *s->cq_ring.ring_mask];
583 fi = (struct file_io *)cqe->user_data;
584 if (cqe->res < 0)
585 break;
586
587 int blocks = (int)(fi->file_sz + URING_BLOCK_SZ - 1) / URING_BLOCK_SZ;
588
589 for (int i = 0; i < blocks; i++)
590 len += fi->iovecs[i].iov_len;
591
592 head++;
593 } while (1);
594
595 *cring->head = head;
596 barrier();
597
598 return (len != fi->file_sz);
599 }
600
601 /*
602 * Submit squeue. specify via IORING_OP_READV.
603 * the buffer need to be set metadata according to LAM mode
604 */
handle_uring_sq(struct io_ring * ring,struct file_io * fi,unsigned long lam)605 int handle_uring_sq(struct io_ring *ring, struct file_io *fi, unsigned long lam)
606 {
607 int file_fd = fi->file_fd;
608 struct io_uring_queue *sring = &ring->sq_ring;
609 unsigned int index = 0, cur_block = 0, tail = 0, next_tail = 0;
610 struct io_uring_sqe *sqe;
611
612 off_t remain = fi->file_sz;
613 int blocks = (int)(remain + URING_BLOCK_SZ - 1) / URING_BLOCK_SZ;
614
615 while (remain) {
616 off_t bytes = remain;
617 void *buf;
618
619 if (bytes > URING_BLOCK_SZ)
620 bytes = URING_BLOCK_SZ;
621
622 fi->iovecs[cur_block].iov_len = bytes;
623
624 if (posix_memalign(&buf, URING_BLOCK_SZ, URING_BLOCK_SZ))
625 return 1;
626
627 fi->iovecs[cur_block].iov_base = (void *)set_metadata((uint64_t)buf, lam);
628 remain -= bytes;
629 cur_block++;
630 }
631
632 next_tail = *sring->tail;
633 tail = next_tail;
634 next_tail++;
635
636 barrier();
637
638 index = tail & *ring->sq_ring.ring_mask;
639
640 sqe = &ring->sq_ring.queue.sqes[index];
641 sqe->fd = file_fd;
642 sqe->flags = 0;
643 sqe->opcode = IORING_OP_READV;
644 sqe->addr = (unsigned long)fi->iovecs;
645 sqe->len = blocks;
646 sqe->off = 0;
647 sqe->user_data = (uint64_t)fi;
648
649 sring->array[index] = index;
650 tail = next_tail;
651
652 if (*sring->tail != tail) {
653 *sring->tail = tail;
654 barrier();
655 }
656
657 if (sys_uring_enter(ring->ring_fd, 1, 1, IORING_ENTER_GETEVENTS) < 0)
658 return 1;
659
660 return 0;
661 }
662
663 /*
664 * Test LAM in async I/O and io_uring, read current binery through io_uring
665 * Set metadata in pointers to iovecs buffer.
666 */
do_uring(unsigned long lam)667 int do_uring(unsigned long lam)
668 {
669 struct io_ring *ring;
670 struct file_io *fi;
671 struct stat st;
672 int ret = 1;
673 char path[PATH_MAX] = {0};
674
675 /* get current process path */
676 if (readlink("/proc/self/exe", path, PATH_MAX - 1) <= 0)
677 return 1;
678
679 int file_fd = open(path, O_RDONLY);
680
681 if (file_fd < 0)
682 return 1;
683
684 if (fstat(file_fd, &st) < 0)
685 goto cleanup;
686
687 off_t file_sz = st.st_size;
688
689 int blocks = (int)(file_sz + URING_BLOCK_SZ - 1) / URING_BLOCK_SZ;
690
691 fi = malloc(sizeof(*fi) + sizeof(struct iovec) * blocks);
692 if (!fi)
693 goto cleanup;
694
695 fi->file_sz = file_sz;
696 fi->file_fd = file_fd;
697
698 ring = malloc(sizeof(*ring));
699 if (!ring) {
700 free(fi);
701 goto cleanup;
702 }
703
704 memset(ring, 0, sizeof(struct io_ring));
705
706 if (setup_io_uring(ring))
707 goto out;
708
709 if (handle_uring_sq(ring, fi, lam))
710 goto out;
711
712 ret = handle_uring_cq(ring);
713
714 out:
715 free(ring);
716
717 for (int i = 0; i < blocks; i++) {
718 if (fi->iovecs[i].iov_base) {
719 uint64_t addr = ((uint64_t)fi->iovecs[i].iov_base);
720
721 switch (lam) {
722 case LAM_U57_BITS: /* Clear bits 62:57 */
723 addr = (addr & ~(LAM_U57_MASK));
724 break;
725 }
726 free((void *)addr);
727 fi->iovecs[i].iov_base = NULL;
728 }
729 }
730
731 free(fi);
732 cleanup:
733 close(file_fd);
734
735 return ret;
736 }
737
handle_uring(struct testcases * test)738 int handle_uring(struct testcases *test)
739 {
740 int ret = 0;
741
742 if (test->later == 0 && test->lam != 0)
743 if (set_lam(test->lam) != 0)
744 return 1;
745
746 if (sigsetjmp(segv_env, 1) == 0) {
747 signal(SIGSEGV, segv_handler);
748 ret = do_uring(test->lam);
749 } else {
750 ret = 2;
751 }
752
753 return ret;
754 }
755
fork_test(struct testcases * test)756 static int fork_test(struct testcases *test)
757 {
758 int ret, child_ret;
759 pid_t pid;
760
761 pid = fork();
762 if (pid < 0) {
763 perror("Fork failed.");
764 ret = 1;
765 } else if (pid == 0) {
766 ret = test->test_func(test);
767 exit(ret);
768 } else {
769 wait(&child_ret);
770 ret = WEXITSTATUS(child_ret);
771 }
772
773 return ret;
774 }
775
handle_execve(struct testcases * test)776 static int handle_execve(struct testcases *test)
777 {
778 int ret, child_ret;
779 int lam = test->lam;
780 pid_t pid;
781
782 pid = fork();
783 if (pid < 0) {
784 perror("Fork failed.");
785 ret = 1;
786 } else if (pid == 0) {
787 char path[PATH_MAX] = {0};
788
789 /* Set LAM mode in parent process */
790 if (set_lam(lam) != 0)
791 return 1;
792
793 /* Get current binary's path and the binary was run by execve */
794 if (readlink("/proc/self/exe", path, PATH_MAX - 1) <= 0)
795 exit(-1);
796
797 /* run binary to get LAM mode and return to parent process */
798 if (execlp(path, path, "-t 0x0", NULL) < 0) {
799 perror("error on exec");
800 exit(-1);
801 }
802 } else {
803 wait(&child_ret);
804 ret = WEXITSTATUS(child_ret);
805 if (ret != LAM_NONE)
806 return 1;
807 }
808
809 return 0;
810 }
811
handle_inheritance(struct testcases * test)812 static int handle_inheritance(struct testcases *test)
813 {
814 int ret, child_ret;
815 int lam = test->lam;
816 pid_t pid;
817
818 /* Set LAM mode in parent process */
819 if (set_lam(lam) != 0)
820 return 1;
821
822 pid = fork();
823 if (pid < 0) {
824 perror("Fork failed.");
825 return 1;
826 } else if (pid == 0) {
827 /* Set LAM mode in parent process */
828 int child_lam = get_lam();
829
830 exit(child_lam);
831 } else {
832 wait(&child_ret);
833 ret = WEXITSTATUS(child_ret);
834
835 if (lam != ret)
836 return 1;
837 }
838
839 return 0;
840 }
841
thread_fn_get_lam(void * arg)842 static int thread_fn_get_lam(void *arg)
843 {
844 return get_lam();
845 }
846
thread_fn_set_lam(void * arg)847 static int thread_fn_set_lam(void *arg)
848 {
849 struct testcases *test = arg;
850
851 return set_lam(test->lam);
852 }
853
handle_thread(struct testcases * test)854 static int handle_thread(struct testcases *test)
855 {
856 char stack[STACK_SIZE];
857 int ret, child_ret;
858 int lam = 0;
859 pid_t pid;
860
861 /* Set LAM mode in parent process */
862 if (!test->later) {
863 lam = test->lam;
864 if (set_lam(lam) != 0)
865 return 1;
866 }
867
868 pid = clone(thread_fn_get_lam, stack + STACK_SIZE,
869 SIGCHLD | CLONE_FILES | CLONE_FS | CLONE_VM, NULL);
870 if (pid < 0) {
871 perror("Clone failed.");
872 return 1;
873 }
874
875 waitpid(pid, &child_ret, 0);
876 ret = WEXITSTATUS(child_ret);
877
878 if (lam != ret)
879 return 1;
880
881 if (test->later) {
882 if (set_lam(test->lam) != 0)
883 return 1;
884 }
885
886 return 0;
887 }
888
handle_thread_enable(struct testcases * test)889 static int handle_thread_enable(struct testcases *test)
890 {
891 char stack[STACK_SIZE];
892 int ret, child_ret;
893 int lam = test->lam;
894 pid_t pid;
895
896 pid = clone(thread_fn_set_lam, stack + STACK_SIZE,
897 SIGCHLD | CLONE_FILES | CLONE_FS | CLONE_VM, test);
898 if (pid < 0) {
899 perror("Clone failed.");
900 return 1;
901 }
902
903 waitpid(pid, &child_ret, 0);
904 ret = WEXITSTATUS(child_ret);
905
906 if (lam != ret)
907 return 1;
908
909 return 0;
910 }
run_test(struct testcases * test,int count)911 static void run_test(struct testcases *test, int count)
912 {
913 int i, ret = 0;
914
915 for (i = 0; i < count; i++) {
916 struct testcases *t = test + i;
917
918 /* fork a process to run test case */
919 tests_cnt++;
920 ret = fork_test(t);
921
922 /* return 3 is not support LA57, the case should be skipped */
923 if (ret == 3) {
924 ksft_test_result_skip("%s", t->msg);
925 continue;
926 }
927
928 if (ret != 0)
929 ret = (t->expected == ret);
930 else
931 ret = !(t->expected);
932
933 ksft_test_result(ret, "%s", t->msg);
934 }
935 }
936
937 static struct testcases uring_cases[] = {
938 {
939 .later = 0,
940 .lam = LAM_U57_BITS,
941 .test_func = handle_uring,
942 .msg = "URING: LAM_U57. Dereferencing pointer with metadata\n",
943 },
944 {
945 .later = 1,
946 .expected = 1,
947 .lam = LAM_U57_BITS,
948 .test_func = handle_uring,
949 .msg = "URING:[Negative] Disable LAM. Dereferencing pointer with metadata.\n",
950 },
951 };
952
953 static struct testcases malloc_cases[] = {
954 {
955 .later = 0,
956 .lam = LAM_U57_BITS,
957 .test_func = handle_malloc,
958 .msg = "MALLOC: LAM_U57. Dereferencing pointer with metadata\n",
959 },
960 {
961 .later = 1,
962 .expected = 2,
963 .lam = LAM_U57_BITS,
964 .test_func = handle_malloc,
965 .msg = "MALLOC:[Negative] Disable LAM. Dereferencing pointer with metadata.\n",
966 },
967 };
968
969 static struct testcases bits_cases[] = {
970 {
971 .test_func = handle_max_bits,
972 .msg = "BITS: Check default tag bits\n",
973 },
974 };
975
976 static struct testcases syscall_cases[] = {
977 {
978 .later = 0,
979 .lam = LAM_U57_BITS,
980 .test_func = handle_syscall,
981 .msg = "SYSCALL: LAM_U57. syscall with metadata\n",
982 },
983 {
984 .later = 1,
985 .expected = 1,
986 .lam = LAM_U57_BITS,
987 .test_func = handle_syscall,
988 .msg = "SYSCALL:[Negative] Disable LAM. Dereferencing pointer with metadata.\n",
989 },
990 {
991 .later = GET_USER_USER,
992 .lam = LAM_U57_BITS,
993 .test_func = get_user_syscall,
994 .msg = "GET_USER: get_user() and pass a properly tagged user pointer.\n",
995 },
996 {
997 .later = GET_USER_KERNEL_TOP,
998 .expected = 1,
999 .lam = LAM_U57_BITS,
1000 .test_func = get_user_syscall,
1001 .msg = "GET_USER:[Negative] get_user() with a kernel pointer and the top bit cleared.\n",
1002 },
1003 {
1004 .later = GET_USER_KERNEL_BOT,
1005 .expected = 1,
1006 .lam = LAM_U57_BITS,
1007 .test_func = get_user_syscall,
1008 .msg = "GET_USER:[Negative] get_user() with a kernel pointer and the bottom sign-extension bit cleared.\n",
1009 },
1010 {
1011 .later = GET_USER_KERNEL,
1012 .expected = 1,
1013 .lam = LAM_U57_BITS,
1014 .test_func = get_user_syscall,
1015 .msg = "GET_USER:[Negative] get_user() and pass a kernel pointer.\n",
1016 },
1017 };
1018
1019 static struct testcases mmap_cases[] = {
1020 {
1021 .later = 1,
1022 .expected = 0,
1023 .lam = LAM_U57_BITS,
1024 .addr = HIGH_ADDR,
1025 .test_func = handle_mmap,
1026 .msg = "MMAP: First mmap high address, then set LAM_U57.\n",
1027 },
1028 {
1029 .later = 0,
1030 .expected = 0,
1031 .lam = LAM_U57_BITS,
1032 .addr = HIGH_ADDR,
1033 .test_func = handle_mmap,
1034 .msg = "MMAP: First LAM_U57, then High address.\n",
1035 },
1036 {
1037 .later = 0,
1038 .expected = 0,
1039 .lam = LAM_U57_BITS,
1040 .addr = LOW_ADDR,
1041 .test_func = handle_mmap,
1042 .msg = "MMAP: First LAM_U57, then Low address.\n",
1043 },
1044 };
1045
1046 static struct testcases inheritance_cases[] = {
1047 {
1048 .expected = 0,
1049 .lam = LAM_U57_BITS,
1050 .test_func = handle_inheritance,
1051 .msg = "FORK: LAM_U57, child process should get LAM mode same as parent\n",
1052 },
1053 {
1054 .expected = 0,
1055 .lam = LAM_U57_BITS,
1056 .test_func = handle_thread,
1057 .msg = "THREAD: LAM_U57, child thread should get LAM mode same as parent\n",
1058 },
1059 {
1060 .expected = 1,
1061 .lam = LAM_U57_BITS,
1062 .test_func = handle_thread_enable,
1063 .msg = "THREAD: [NEGATIVE] Enable LAM in child.\n",
1064 },
1065 {
1066 .expected = 1,
1067 .later = 1,
1068 .lam = LAM_U57_BITS,
1069 .test_func = handle_thread,
1070 .msg = "THREAD: [NEGATIVE] Enable LAM in parent after thread created.\n",
1071 },
1072 {
1073 .expected = 0,
1074 .lam = LAM_U57_BITS,
1075 .test_func = handle_execve,
1076 .msg = "EXECVE: LAM_U57, child process should get disabled LAM mode\n",
1077 },
1078 };
1079
cmd_help(void)1080 static void cmd_help(void)
1081 {
1082 printf("usage: lam [-h] [-t test list]\n");
1083 printf("\t-t test list: run tests specified in the test list, default:0x%x\n", TEST_MASK);
1084 printf("\t\t0x1:malloc; 0x2:max_bits; 0x4:mmap; 0x8:syscall; 0x10:io_uring; 0x20:inherit;\n");
1085 printf("\t-h: help\n");
1086 }
1087
1088 /* Check for file existence */
file_Exists(const char * fileName)1089 uint8_t file_Exists(const char *fileName)
1090 {
1091 struct stat buffer;
1092
1093 uint8_t ret = (stat(fileName, &buffer) == 0);
1094
1095 return ret;
1096 }
1097
1098 /* Sysfs idxd files */
1099 const char *dsa_configs[] = {
1100 "echo 1 > /sys/bus/dsa/devices/dsa0/wq0.1/group_id",
1101 "echo shared > /sys/bus/dsa/devices/dsa0/wq0.1/mode",
1102 "echo 10 > /sys/bus/dsa/devices/dsa0/wq0.1/priority",
1103 "echo 16 > /sys/bus/dsa/devices/dsa0/wq0.1/size",
1104 "echo 15 > /sys/bus/dsa/devices/dsa0/wq0.1/threshold",
1105 "echo user > /sys/bus/dsa/devices/dsa0/wq0.1/type",
1106 "echo MyApp1 > /sys/bus/dsa/devices/dsa0/wq0.1/name",
1107 "echo 1 > /sys/bus/dsa/devices/dsa0/engine0.1/group_id",
1108 "echo dsa0 > /sys/bus/dsa/drivers/idxd/bind",
1109 /* bind files and devices, generated a device file in /dev */
1110 "echo wq0.1 > /sys/bus/dsa/drivers/user/bind",
1111 };
1112
1113 /* DSA device file */
1114 const char *dsaDeviceFile = "/dev/dsa/wq0.1";
1115 /* file for io*/
1116 const char *dsaPasidEnable = "/sys/bus/dsa/devices/dsa0/pasid_enabled";
1117
1118 /*
1119 * DSA depends on kernel cmdline "intel_iommu=on,sm_on"
1120 * return pasid_enabled (0: disable 1:enable)
1121 */
Check_DSA_Kernel_Setting(void)1122 int Check_DSA_Kernel_Setting(void)
1123 {
1124 char command[256] = "";
1125 char buf[256] = "";
1126 char *ptr;
1127 int rv = -1;
1128
1129 snprintf(command, sizeof(command) - 1, "cat %s", dsaPasidEnable);
1130
1131 FILE *cmd = popen(command, "r");
1132
1133 if (cmd) {
1134 while (fgets(buf, sizeof(buf) - 1, cmd) != NULL);
1135
1136 pclose(cmd);
1137 rv = strtol(buf, &ptr, 16);
1138 }
1139
1140 return rv;
1141 }
1142
1143 /*
1144 * Config DSA's sysfs files as shared DSA's WQ.
1145 * Generated a device file /dev/dsa/wq0.1
1146 * Return: 0 OK; 1 Failed; 3 Skip(SVA disabled).
1147 */
Dsa_Init_Sysfs(void)1148 int Dsa_Init_Sysfs(void)
1149 {
1150 uint len = ARRAY_SIZE(dsa_configs);
1151 const char **p = dsa_configs;
1152
1153 if (file_Exists(dsaDeviceFile) == 1)
1154 return 0;
1155
1156 /* check the idxd driver */
1157 if (file_Exists(dsaPasidEnable) != 1) {
1158 printf("Please make sure idxd driver was loaded\n");
1159 return 3;
1160 }
1161
1162 /* Check SVA feature */
1163 if (Check_DSA_Kernel_Setting() != 1) {
1164 printf("Please enable SVA.(Add intel_iommu=on,sm_on in kernel cmdline)\n");
1165 return 3;
1166 }
1167
1168 /* Check the idxd device file on /dev/dsa/ */
1169 for (int i = 0; i < len; i++) {
1170 if (system(p[i]))
1171 return 1;
1172 }
1173
1174 /* After config, /dev/dsa/wq0.1 should be generated */
1175 return (file_Exists(dsaDeviceFile) != 1);
1176 }
1177
1178 /*
1179 * Open DSA device file, triger API: iommu_sva_alloc_pasid
1180 */
allocate_dsa_pasid(void)1181 void *allocate_dsa_pasid(void)
1182 {
1183 int fd;
1184 void *wq;
1185
1186 fd = open(dsaDeviceFile, O_RDWR);
1187 if (fd < 0) {
1188 perror("open");
1189 return MAP_FAILED;
1190 }
1191
1192 wq = mmap(NULL, 0x1000, PROT_WRITE,
1193 MAP_SHARED | MAP_POPULATE, fd, 0);
1194 close(fd);
1195 if (wq == MAP_FAILED)
1196 perror("mmap");
1197
1198 return wq;
1199 }
1200
set_force_svm(void)1201 int set_force_svm(void)
1202 {
1203 int ret = 0;
1204
1205 ret = syscall(SYS_arch_prctl, ARCH_FORCE_TAGGED_SVA);
1206
1207 return ret;
1208 }
1209
handle_pasid(struct testcases * test)1210 int handle_pasid(struct testcases *test)
1211 {
1212 uint tmp = test->cmd;
1213 uint runed = 0x0;
1214 int ret = 0;
1215 void *wq = NULL;
1216
1217 ret = Dsa_Init_Sysfs();
1218 if (ret != 0)
1219 return ret;
1220
1221 for (int i = 0; i < 3; i++) {
1222 int err = 0;
1223
1224 if (tmp & 0x1) {
1225 /* run set lam mode*/
1226 if ((runed & 0x1) == 0) {
1227 err = set_lam(LAM_U57_BITS);
1228 runed = runed | 0x1;
1229 } else
1230 err = 1;
1231 } else if (tmp & 0x4) {
1232 /* run force svm */
1233 if ((runed & 0x4) == 0) {
1234 err = set_force_svm();
1235 runed = runed | 0x4;
1236 } else
1237 err = 1;
1238 } else if (tmp & 0x2) {
1239 /* run allocate pasid */
1240 if ((runed & 0x2) == 0) {
1241 runed = runed | 0x2;
1242 wq = allocate_dsa_pasid();
1243 if (wq == MAP_FAILED)
1244 err = 1;
1245 } else
1246 err = 1;
1247 }
1248
1249 ret = ret + err;
1250 if (ret > 0)
1251 break;
1252
1253 tmp = tmp >> 4;
1254 }
1255
1256 if (wq != MAP_FAILED && wq != NULL)
1257 if (munmap(wq, 0x1000))
1258 printf("munmap failed %d\n", errno);
1259
1260 if (runed != 0x7)
1261 ret = 1;
1262
1263 return (ret != 0);
1264 }
1265
1266 /*
1267 * Pasid test depends on idxd and SVA, kernel should enable iommu and sm.
1268 * command line(intel_iommu=on,sm_on)
1269 */
1270 static struct testcases pasid_cases[] = {
1271 {
1272 .expected = 1,
1273 .cmd = PAS_CMD(LAM_CMD_BIT, PAS_CMD_BIT, SVA_CMD_BIT),
1274 .test_func = handle_pasid,
1275 .msg = "PASID: [Negative] Execute LAM, PASID, SVA in sequence\n",
1276 },
1277 {
1278 .expected = 0,
1279 .cmd = PAS_CMD(LAM_CMD_BIT, SVA_CMD_BIT, PAS_CMD_BIT),
1280 .test_func = handle_pasid,
1281 .msg = "PASID: Execute LAM, SVA, PASID in sequence\n",
1282 },
1283 {
1284 .expected = 1,
1285 .cmd = PAS_CMD(PAS_CMD_BIT, LAM_CMD_BIT, SVA_CMD_BIT),
1286 .test_func = handle_pasid,
1287 .msg = "PASID: [Negative] Execute PASID, LAM, SVA in sequence\n",
1288 },
1289 {
1290 .expected = 0,
1291 .cmd = PAS_CMD(PAS_CMD_BIT, SVA_CMD_BIT, LAM_CMD_BIT),
1292 .test_func = handle_pasid,
1293 .msg = "PASID: Execute PASID, SVA, LAM in sequence\n",
1294 },
1295 {
1296 .expected = 0,
1297 .cmd = PAS_CMD(SVA_CMD_BIT, LAM_CMD_BIT, PAS_CMD_BIT),
1298 .test_func = handle_pasid,
1299 .msg = "PASID: Execute SVA, LAM, PASID in sequence\n",
1300 },
1301 {
1302 .expected = 0,
1303 .cmd = PAS_CMD(SVA_CMD_BIT, PAS_CMD_BIT, LAM_CMD_BIT),
1304 .test_func = handle_pasid,
1305 .msg = "PASID: Execute SVA, PASID, LAM in sequence\n",
1306 },
1307 };
1308
main(int argc,char ** argv)1309 int main(int argc, char **argv)
1310 {
1311 int c = 0;
1312 unsigned int tests = TEST_MASK;
1313
1314 tests_cnt = 0;
1315
1316 if (!lam_is_available())
1317 return KSFT_SKIP;
1318
1319 while ((c = getopt(argc, argv, "ht:")) != -1) {
1320 switch (c) {
1321 case 't':
1322 tests = strtoul(optarg, NULL, 16);
1323 if (tests && !(tests & TEST_MASK)) {
1324 ksft_print_msg("Invalid argument!\n");
1325 return -1;
1326 }
1327 break;
1328 case 'h':
1329 cmd_help();
1330 return 0;
1331 default:
1332 ksft_print_msg("Invalid argument\n");
1333 return -1;
1334 }
1335 }
1336
1337 /*
1338 * When tests is 0, it is not a real test case;
1339 * the option used by test case(execve) to check the lam mode in
1340 * process generated by execve, the process read back lam mode and
1341 * check with lam mode in parent process.
1342 */
1343 if (!tests)
1344 return (get_lam());
1345
1346 /* Run test cases */
1347 if (tests & FUNC_MALLOC)
1348 run_test(malloc_cases, ARRAY_SIZE(malloc_cases));
1349
1350 if (tests & FUNC_BITS)
1351 run_test(bits_cases, ARRAY_SIZE(bits_cases));
1352
1353 if (tests & FUNC_MMAP)
1354 run_test(mmap_cases, ARRAY_SIZE(mmap_cases));
1355
1356 if (tests & FUNC_SYSCALL)
1357 run_test(syscall_cases, ARRAY_SIZE(syscall_cases));
1358
1359 if (tests & FUNC_URING)
1360 run_test(uring_cases, ARRAY_SIZE(uring_cases));
1361
1362 if (tests & FUNC_INHERITE)
1363 run_test(inheritance_cases, ARRAY_SIZE(inheritance_cases));
1364
1365 if (tests & FUNC_PASID)
1366 run_test(pasid_cases, ARRAY_SIZE(pasid_cases));
1367
1368 ksft_set_plan(tests_cnt);
1369
1370 ksft_exit_pass();
1371 }
1372